diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml
index 62364fe..4985803 100644
--- a/data/conf/hive-site.xml
+++ b/data/conf/hive-site.xml
@@ -302,12 +302,15 @@
true
-
hive.llap.io.allocator.direct
false
+
+ hive.stats.column.autogather
+ true
+
hive.materializedview.rewriting
diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out
index c197152..bf4811b 100644
--- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out
+++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table2_h23.q.out
@@ -78,7 +78,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 500
rawDataSize 5312
@@ -176,7 +176,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 8
numRows 500
rawDataSize 5312
@@ -274,7 +274,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 8
numRows 500
rawDataSize 5312
@@ -372,7 +372,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 8
numRows 500
rawDataSize 5312
@@ -470,7 +470,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
numRows 500
rawDataSize 5312
@@ -568,7 +568,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
numRows 500
rawDataSize 5312
@@ -666,7 +666,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
numRows 500
rawDataSize 5312
@@ -764,7 +764,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 4
numRows 500
rawDataSize 5312
@@ -862,7 +862,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 500
rawDataSize 5312
diff --git a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out
index 3890aef..9f7d084 100644
--- a/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out
+++ b/ql/src/test/results/clientpositive/alter_numbuckets_partitioned_table_h23.q.out
@@ -87,7 +87,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 8
numRows 500
rawDataSize 5312
@@ -144,7 +144,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 12
numRows 500
rawDataSize 5312
@@ -272,7 +272,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 12
numRows 500
rawDataSize 5312
@@ -360,7 +360,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
#### A masked pattern was here ####
numFiles 12
numRows 500
diff --git a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
index daa6255..ace1293 100644
--- a/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
+++ b/ql/src/test/results/clientpositive/alter_partition_coltype.q.out
@@ -232,7 +232,7 @@ STAGE PLANS:
dt 100
ts 3.0
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -278,7 +278,7 @@ STAGE PLANS:
dt 100
ts 6.30
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value
@@ -469,7 +469,7 @@ STAGE PLANS:
partcol1 1
partcol2 1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}}
bucket_count -1
column.name.delimiter ,
columns intcol
@@ -538,7 +538,7 @@ STAGE PLANS:
partcol1 2
partcol2 1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"intcol":"true"}}
bucket_count -1
column.name.delimiter ,
columns intcol
diff --git a/ql/src/test/results/clientpositive/alter_table_add_partition.q.out b/ql/src/test/results/clientpositive/alter_table_add_partition.q.out
index 7b1075e..b3670fe 100644
--- a/ql/src/test/results/clientpositive/alter_table_add_partition.q.out
+++ b/ql/src/test/results/clientpositive/alter_table_add_partition.q.out
@@ -196,7 +196,7 @@ Database: default
Table: mp
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}}
numFiles 1
numRows 1
rawDataSize 1
diff --git a/ql/src/test/results/clientpositive/alter_table_serde2.q.out b/ql/src/test/results/clientpositive/alter_table_serde2.q.out
index 9208ec3..d21fd74 100644
--- a/ql/src/test/results/clientpositive/alter_table_serde2.q.out
+++ b/ql/src/test/results/clientpositive/alter_table_serde2.q.out
@@ -78,7 +78,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 500
rawDataSize 5312
@@ -177,7 +177,7 @@ Database: default
Table: tst1
#### A masked pattern was here ####
Partition Parameters:
- COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
numFiles 1
numRows 500
rawDataSize 5312
diff --git a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out
index 2bfc04a..3a47edb 100644
--- a/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out
+++ b/ql/src/test/results/clientpositive/analyze_table_null_partition.q.out
@@ -102,7 +102,7 @@ STAGE PLANS:
partition values:
age 15
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}}
bucket_count -1
column.name.delimiter ,
columns name
@@ -147,7 +147,7 @@ STAGE PLANS:
partition values:
age 30
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}}
bucket_count -1
column.name.delimiter ,
columns name
@@ -192,7 +192,7 @@ STAGE PLANS:
partition values:
age 40
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}}
bucket_count -1
column.name.delimiter ,
columns name
@@ -237,7 +237,7 @@ STAGE PLANS:
partition values:
age __HIVE_DEFAULT_PARTITION__
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"name":"true"}}
bucket_count -1
column.name.delimiter ,
columns name
diff --git a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
index e22c3ef..bcb1dd2 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
@@ -66,11 +66,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: explain select * from loc_orc where state='OH'
@@ -87,17 +87,17 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
Filter Operator
predicate: (state = 'OH') (type: boolean)
- Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'OH' (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 4 Data size: 398 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
index a8e4854..150e7f8 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
@@ -66,11 +66,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: analyze table loc_orc compute statistics for columns state
@@ -106,22 +106,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
keys: state (type: string), locid (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
- Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -129,13 +129,13 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: int)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(_col1)
keys: _col0 (type: string), _col2 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -151,7 +151,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: bigint)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: int)
Reduce Operator Tree:
Group By Operator
@@ -159,10 +159,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -743,30 +743,30 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), zip (type: bigint)
outputColumnNames: state, zip
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: state (type: string), zip (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/auto_join1.q.out b/ql/src/test/results/clientpositive/auto_join1.q.out
index 5f4bb74..dbd49a5 100644
--- a/ql/src/test/results/clientpositive/auto_join1.q.out
+++ b/ql/src/test/results/clientpositive/auto_join1.q.out
@@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:src1
@@ -44,7 +46,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -77,6 +79,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -93,6 +110,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest_j1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/auto_join14.q.out b/ql/src/test/results/clientpositive/auto_join14.q.out
index 1dd677c..affad1e 100644
--- a/ql/src/test/results/clientpositive/auto_join14.q.out
+++ b/ql/src/test/results/clientpositive/auto_join14.q.out
@@ -15,13 +15,15 @@ FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and
INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:src
@@ -44,7 +46,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -77,6 +79,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: c1, c2
+ Statistics: Num rows: 366 Data size: 3890 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 16), compute_stats(c2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -93,6 +110,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: c1, c2
+ Column Types: int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 100
INSERT OVERWRITE TABLE dest1 SELECT src.key, srcpart.value
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/auto_join17.q.out b/ql/src/test/results/clientpositive/auto_join17.q.out
index d39c36e..87158b7 100644
--- a/ql/src/test/results/clientpositive/auto_join17.q.out
+++ b/ql/src/test/results/clientpositive/auto_join17.q.out
@@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.*
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:src1
@@ -44,7 +46,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -77,6 +79,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string)
+ outputColumnNames: key1, value1, key2, value2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -93,6 +110,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key1, value1, key2, value2
+ Column Types: int, string, int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.*
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/auto_join19.q.out b/ql/src/test/results/clientpositive/auto_join19.q.out
index 3f70055..d492a03 100644
--- a/ql/src/test/results/clientpositive/auto_join19.q.out
+++ b/ql/src/test/results/clientpositive/auto_join19.q.out
@@ -17,13 +17,15 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11')
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_1:src2
@@ -46,7 +48,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -79,6 +81,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -95,6 +112,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11')
diff --git a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out
index 3f70055..d492a03 100644
--- a/ql/src/test/results/clientpositive/auto_join19_inclause.q.out
+++ b/ql/src/test/results/clientpositive/auto_join19_inclause.q.out
@@ -17,13 +17,15 @@ INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11')
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_1:src2
@@ -46,7 +48,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -79,6 +81,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -95,6 +112,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value
where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11')
diff --git a/ql/src/test/results/clientpositive/auto_join2.q.out b/ql/src/test/results/clientpositive/auto_join2.q.out
index b17d344..0c3fa26 100644
--- a/ql/src/test/results/clientpositive/auto_join2.q.out
+++ b/ql/src/test/results/clientpositive/auto_join2.q.out
@@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key
INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-8 is a root stage
- Stage-6 depends on stages: Stage-8
- Stage-0 depends on stages: Stage-6
+ Stage-9 is a root stage
+ Stage-7 depends on stages: Stage-9
+ Stage-0 depends on stages: Stage-7
Stage-3 depends on stages: Stage-0
+ Stage-10 depends on stages: Stage-3, Stage-4
+ Stage-4 depends on stages: Stage-7
STAGE PLANS:
- Stage: Stage-8
+ Stage: Stage-9
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:src1
@@ -62,7 +64,7 @@ STAGE PLANS:
0 (UDFToDouble(_col0) + UDFToDouble(_col1)) (type: double)
1 UDFToDouble(_col0) (type: double)
- Stage: Stage-6
+ Stage: Stage-7
Map Reduce
Map Operator Tree:
TableScan
@@ -103,6 +105,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j2
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 605 Data size: 6427 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -119,6 +136,35 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-10
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest_j2
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key)
INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/auto_join25.q.out b/ql/src/test/results/clientpositive/auto_join25.q.out
index 534bdb6..7b790ef 100644
--- a/ql/src/test/results/clientpositive/auto_join25.q.out
+++ b/ql/src/test/results/clientpositive/auto_join25.q.out
@@ -33,11 +33,13 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
POSTHOOK: Output: default@dest1
POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
-RUN: Stage-6:CONDITIONAL
-RUN: Stage-7:MAPREDLOCAL
+RUN: Stage-7:CONDITIONAL
+RUN: Stage-8:MAPREDLOCAL
RUN: Stage-1:MAPRED
RUN: Stage-0:MOVE
+RUN: Stage-3:MAPRED
RUN: Stage-2:STATS
+RUN: Stage-10:COLUMNSTATS
PREHOOK: query: SELECT sum(hash(dest1.key,dest1.value)) FROM dest1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest1
@@ -64,8 +66,6 @@ PREHOOK: Input: default@src
PREHOOK: Output: default@dest_j2
FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask
ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask
-FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask
-ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask
POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key)
INSERT OVERWRITE TABLE dest_j2 SELECT src1.key, src3.value
POSTHOOK: type: QUERY
@@ -73,14 +73,16 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest_j2
POSTHOOK: Lineage: dest_j2.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest_j2.value SIMPLE [(src)src3.FieldSchema(name:value, type:string, comment:default), ]
-RUN: Stage-11:CONDITIONAL
-RUN: Stage-14:MAPREDLOCAL
+RUN: Stage-12:CONDITIONAL
+RUN: Stage-15:MAPREDLOCAL
RUN: Stage-1:MAPRED
-RUN: Stage-8:CONDITIONAL
-RUN: Stage-12:MAPREDLOCAL
-RUN: Stage-2:MAPRED
+RUN: Stage-9:CONDITIONAL
+RUN: Stage-13:MAPREDLOCAL
+RUN: Stage-7:MAPRED
RUN: Stage-0:MOVE
+RUN: Stage-4:MAPRED
RUN: Stage-3:STATS
+RUN: Stage-17:COLUMNSTATS
PREHOOK: query: SELECT sum(hash(dest_j2.key,dest_j2.value)) FROM dest_j2
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j2
@@ -105,8 +107,6 @@ INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@dest_j1
-FAILED: Execution Error, return code 3 from org.apache.hadoop.hive.ql.exec.mr.MapredLocalTask
-ATTEMPT: Execute BackupTask: org.apache.hadoop.hive.ql.exec.mr.MapRedTask
POSTHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest_j1 SELECT src1.key, src2.value
POSTHOOK: type: QUERY
@@ -114,11 +114,13 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest_j1
POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
-RUN: Stage-6:CONDITIONAL
-RUN: Stage-7:MAPREDLOCAL
-RUN: Stage-1:MAPRED
+RUN: Stage-7:CONDITIONAL
+RUN: Stage-8:MAPREDLOCAL
+RUN: Stage-5:MAPRED
RUN: Stage-0:MOVE
+RUN: Stage-3:MAPRED
RUN: Stage-2:STATS
+RUN: Stage-10:COLUMNSTATS
PREHOOK: query: SELECT sum(hash(dest_j1.key,dest_j1.value)) FROM dest_j1
PREHOOK: type: QUERY
PREHOOK: Input: default@dest_j1
diff --git a/ql/src/test/results/clientpositive/auto_join26.q.out b/ql/src/test/results/clientpositive/auto_join26.q.out
index b05145d..50340cd 100644
--- a/ql/src/test/results/clientpositive/auto_join26.q.out
+++ b/ql/src/test/results/clientpositive/auto_join26.q.out
@@ -15,13 +15,15 @@ INSERT OVERWRITE TABLE dest_j1
SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage
- Stage-2 depends on stages: Stage-6
+ Stage-7 is a root stage
+ Stage-2 depends on stages: Stage-7
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-3, Stage-4
+ Stage-4 depends on stages: Stage-2
STAGE PLANS:
- Stage: Stage-6
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:$hdt$_0:x
@@ -98,6 +100,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest_j1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: key, cnt
+ Statistics: Num rows: 275 Data size: 2921 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(cnt, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -112,6 +129,35 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, cnt
+ Column Types: int, int
+ Table: default.dest_j1
+
+ Stage: Stage-4
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 952 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: INSERT OVERWRITE TABLE dest_j1
SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/auto_join3.q.out b/ql/src/test/results/clientpositive/auto_join3.q.out
index 35e8273..d230ae9 100644
--- a/ql/src/test/results/clientpositive/auto_join3.q.out
+++ b/ql/src/test/results/clientpositive/auto_join3.q.out
@@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key
INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage
- Stage-5 depends on stages: Stage-6
- Stage-0 depends on stages: Stage-5
+ Stage-7 is a root stage
+ Stage-6 depends on stages: Stage-7
+ Stage-0 depends on stages: Stage-6
Stage-2 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-6
STAGE PLANS:
- Stage: Stage-6
+ Stage: Stage-7
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:src1
@@ -64,7 +66,7 @@ STAGE PLANS:
1 _col0 (type: string)
2 _col0 (type: string)
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce
Map Operator Tree:
TableScan
@@ -99,6 +101,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 1100 Data size: 11686 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -115,6 +132,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key)
INSERT OVERWRITE TABLE dest1 SELECT src1.key, src3.value
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/auto_join4.q.out b/ql/src/test/results/clientpositive/auto_join4.q.out
index eacf056..a24687d 100644
--- a/ql/src/test/results/clientpositive/auto_join4.q.out
+++ b/ql/src/test/results/clientpositive/auto_join4.q.out
@@ -37,13 +37,15 @@ FROM (
INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_1:src2
@@ -66,7 +68,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -99,6 +101,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string)
+ outputColumnNames: c1, c2, c3, c4
+ Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -115,6 +132,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4
+ Column Types: int, string, int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM (
FROM
(
diff --git a/ql/src/test/results/clientpositive/auto_join5.q.out b/ql/src/test/results/clientpositive/auto_join5.q.out
index d526595..8df09ca 100644
--- a/ql/src/test/results/clientpositive/auto_join5.q.out
+++ b/ql/src/test/results/clientpositive/auto_join5.q.out
@@ -37,13 +37,15 @@ FROM (
INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:src1
@@ -66,7 +68,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -99,6 +101,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string)
+ outputColumnNames: c1, c2, c3, c4
+ Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -115,6 +132,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4
+ Column Types: int, string, int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM (
FROM
(
diff --git a/ql/src/test/results/clientpositive/auto_join6.q.out b/ql/src/test/results/clientpositive/auto_join6.q.out
index 53caf7d..d661908 100644
--- a/ql/src/test/results/clientpositive/auto_join6.q.out
+++ b/ql/src/test/results/clientpositive/auto_join6.q.out
@@ -40,6 +40,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-5 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -98,6 +100,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string)
+ outputColumnNames: c1, c2, c3, c4
+ Statistics: Num rows: 60 Data size: 642 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -112,6 +129,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-5
+ Column Stats Work
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4
+ Column Types: int, string, int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM (
FROM
(
diff --git a/ql/src/test/results/clientpositive/auto_join7.q.out b/ql/src/test/results/clientpositive/auto_join7.q.out
index aec4e82..8a23e3d 100644
--- a/ql/src/test/results/clientpositive/auto_join7.q.out
+++ b/ql/src/test/results/clientpositive/auto_join7.q.out
@@ -50,6 +50,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-6 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -126,6 +128,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string), _col4 (type: int), _col5 (type: string)
+ outputColumnNames: c1, c2, c3, c4, c5, c6
+ Statistics: Num rows: 121 Data size: 1284 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -140,6 +157,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-6
+ Column Stats Work
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4, c5, c6
+ Column Types: int, string, int, string, int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 2904 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 2916 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM (
FROM
(
diff --git a/ql/src/test/results/clientpositive/auto_join8.q.out b/ql/src/test/results/clientpositive/auto_join8.q.out
index ccbafba..8f3a02b 100644
--- a/ql/src/test/results/clientpositive/auto_join8.q.out
+++ b/ql/src/test/results/clientpositive/auto_join8.q.out
@@ -37,13 +37,15 @@ FROM (
INSERT OVERWRITE TABLE dest1 SELECT c.c1, c.c2, c.c3, c.c4 where c.c3 IS NULL AND c.c1 IS NOT NULL
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_1:src2
@@ -66,7 +68,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -102,6 +104,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string)
+ outputColumnNames: c1, c2, c3, c4
+ Statistics: Num rows: 30 Data size: 321 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -118,6 +135,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4
+ Column Types: int, string, int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM (
FROM
(
diff --git a/ql/src/test/results/clientpositive/auto_join9.q.out b/ql/src/test/results/clientpositive/auto_join9.q.out
index d7d7d18..6474ef0 100644
--- a/ql/src/test/results/clientpositive/auto_join9.q.out
+++ b/ql/src/test/results/clientpositive/auto_join9.q.out
@@ -15,13 +15,15 @@ FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:src1
@@ -44,7 +46,7 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -77,6 +79,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -93,6 +110,35 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest1 SELECT src1.key, src2.value where src1.ds = '2008-04-08' and src1.hr = '12'
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
index 156be41..a7508c2 100644
--- a/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
+++ b/ql/src/test/results/clientpositive/auto_join_reordering_values.q.out
@@ -141,7 +141,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}}
bucket_count -1
column.name.delimiter ,
columns dealid,date,time,cityid,userid
@@ -162,7 +162,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}}
bucket_count -1
column.name.delimiter ,
columns dealid,date,time,cityid,userid
@@ -182,7 +182,7 @@ STAGE PLANS:
name: default.orderpayment_small
name: default.orderpayment_small
Truncated Path -> Alias:
- /orderpayment_small [$hdt$_0:orderpayment, $hdt$_1:dim_pay_date]
+ /orderpayment_small [$hdt$_1:orderpayment, $hdt$_2:dim_pay_date]
Needs Tagging: true
Reduce Operator Tree:
Join Operator
@@ -277,7 +277,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}}
bucket_count -1
column.name.delimiter ,
columns dealid,date,time,cityid,userid
@@ -298,7 +298,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}}
bucket_count -1
column.name.delimiter ,
columns dealid,date,time,cityid,userid
@@ -318,7 +318,7 @@ STAGE PLANS:
name: default.orderpayment_small
name: default.orderpayment_small
Truncated Path -> Alias:
- /orderpayment_small [$hdt$_2:deal]
+ /orderpayment_small [$hdt$_3:deal]
#### A masked pattern was here ####
Needs Tagging: true
Reduce Operator Tree:
@@ -414,7 +414,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}}
bucket_count -1
column.name.delimiter ,
columns dealid,date,time,cityid,userid
@@ -435,7 +435,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"cityid":"true","date":"true","dealid":"true","time":"true","userid":"true"}}
bucket_count -1
column.name.delimiter ,
columns dealid,date,time,cityid,userid
@@ -455,7 +455,7 @@ STAGE PLANS:
name: default.orderpayment_small
name: default.orderpayment_small
Truncated Path -> Alias:
- /orderpayment_small [$hdt$_3:order_city]
+ /orderpayment_small [$hdt$_4:order_city]
#### A masked pattern was here ####
Needs Tagging: true
Reduce Operator Tree:
@@ -551,7 +551,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}}
bucket_count -1
column.name.delimiter ,
columns userid
@@ -572,7 +572,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"userid":"true"}}
bucket_count -1
column.name.delimiter ,
columns userid
@@ -592,7 +592,7 @@ STAGE PLANS:
name: default.user_small
name: default.user_small
Truncated Path -> Alias:
- /user_small [$hdt$_4:user]
+ /user_small [$hdt$_0:user]
#### A masked pattern was here ####
Needs Tagging: true
Reduce Operator Tree:
diff --git a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out
index 8c7658c..eda6619 100644
--- a/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out
+++ b/ql/src/test/results/clientpositive/auto_sortmerge_join_13.q.out
@@ -70,8 +70,11 @@ STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-3, Stage-4, Stage-5
+ Stage-8 depends on stages: Stage-3, Stage-4, Stage-5
Stage-1 depends on stages: Stage-2
Stage-4 depends on stages: Stage-1
+ Stage-5 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-2
@@ -104,6 +107,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: k1, k2
+ Group By Operator
+ aggregations: compute_stats(k1, 16), compute_stats(k2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: struct), _col1 (type: struct)
Select Operator
expressions: _col1 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1
@@ -114,6 +127,30 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: k1, k2
+ Group By Operator
+ aggregations: compute_stats(k1, 16), compute_stats(k2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -128,6 +165,20 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: k1, k2
+ Column Types: int, int
+ Table: default.dest1
+
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: k1, k2
+ Column Types: string, string
+ Table: default.dest2
+
Stage: Stage-1
Move Operator
tables:
@@ -141,6 +192,25 @@ STAGE PLANS:
Stage: Stage-4
Stats-Aggr Operator
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: from (
SELECT a.key key1, a.value value1, b.key key2, b.value value2
FROM tbl1 a JOIN tbl2 b
@@ -247,8 +317,11 @@ STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-3, Stage-4, Stage-5
+ Stage-8 depends on stages: Stage-3, Stage-4, Stage-5
Stage-1 depends on stages: Stage-2
Stage-4 depends on stages: Stage-1
+ Stage-5 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-2
@@ -281,6 +354,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: k1, k2
+ Group By Operator
+ aggregations: compute_stats(k1, 16), compute_stats(k2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: struct), _col1 (type: struct)
Select Operator
expressions: _col1 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1
@@ -291,6 +374,30 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: k1, k2
+ Group By Operator
+ aggregations: compute_stats(k1, 16), compute_stats(k2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -305,6 +412,20 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: k1, k2
+ Column Types: int, int
+ Table: default.dest1
+
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: k1, k2
+ Column Types: string, string
+ Table: default.dest2
+
Stage: Stage-1
Move Operator
tables:
@@ -318,6 +439,25 @@ STAGE PLANS:
Stage: Stage-4
Stats-Aggr Operator
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: from (
SELECT a.key key1, a.value value1, b.key key2, b.value value2
FROM tbl1 a JOIN tbl2 b
@@ -424,8 +564,11 @@ STAGE DEPENDENCIES:
Stage-2 is a root stage
Stage-0 depends on stages: Stage-2
Stage-3 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-3, Stage-4, Stage-5
+ Stage-8 depends on stages: Stage-3, Stage-4, Stage-5
Stage-1 depends on stages: Stage-2
Stage-4 depends on stages: Stage-1
+ Stage-5 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-2
@@ -458,6 +601,16 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int)
+ outputColumnNames: k1, k2
+ Group By Operator
+ aggregations: compute_stats(k1, 16), compute_stats(k2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: struct), _col1 (type: struct)
Select Operator
expressions: _col1 (type: string), _col3 (type: string)
outputColumnNames: _col0, _col1
@@ -468,6 +621,30 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest2
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: string)
+ outputColumnNames: k1, k2
+ Group By Operator
+ aggregations: compute_stats(k1, 16), compute_stats(k2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-0
Move Operator
@@ -482,6 +659,20 @@ STAGE PLANS:
Stage: Stage-3
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: k1, k2
+ Column Types: int, int
+ Table: default.dest1
+
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: k1, k2
+ Column Types: string, string
+ Table: default.dest2
+
Stage: Stage-1
Move Operator
tables:
@@ -495,6 +686,25 @@ STAGE PLANS:
Stage: Stage-4
Stats-Aggr Operator
+ Stage: Stage-5
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: from (
SELECT a.key key1, a.value value1, b.key key2, b.value value2
FROM tbl1 a JOIN tbl2 b
diff --git a/ql/src/test/results/clientpositive/binary_output_format.q.out b/ql/src/test/results/clientpositive/binary_output_format.q.out
index 96afeb6..c589541 100644
--- a/ql/src/test/results/clientpositive/binary_output_format.q.out
+++ b/ql/src/test/results/clientpositive/binary_output_format.q.out
@@ -54,6 +54,7 @@ STAGE DEPENDENCIES:
Stage-4
Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
Stage-2 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-2
Stage-3
Stage-5
Stage-6 depends on stages: Stage-5
@@ -117,6 +118,22 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: string)
+ outputColumnNames: mydata
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(mydata, 16)
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct)
+ auto parallelism: false
Path -> Alias:
#### A masked pattern was here ####
Path -> Partition:
@@ -168,6 +185,35 @@ STAGE PLANS:
name: default.src
Truncated Path -> Alias:
/src [src]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0
+ columns.types struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-7
Conditional Operator
@@ -211,6 +257,14 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: mydata
+ Column Types: string
+ Table: default.dest1
+ Is Table Level Stats: true
+
Stage: Stage-3
Map Reduce
Map Operator Tree:
diff --git a/ql/src/test/results/clientpositive/bucket1.q.out b/ql/src/test/results/clientpositive/bucket1.q.out
index 78571aa..68f0c2b 100644
--- a/ql/src/test/results/clientpositive/bucket1.q.out
+++ b/ql/src/test/results/clientpositive/bucket1.q.out
@@ -18,6 +18,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -129,6 +131,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -163,6 +192,83 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-4
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.bucket1_1
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1
+ columns.types struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
PREHOOK: query: insert overwrite table bucket1_1
select * from src
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/bucket2.q.out b/ql/src/test/results/clientpositive/bucket2.q.out
index a8e9f90..fbc7ef7 100644
--- a/ql/src/test/results/clientpositive/bucket2.q.out
+++ b/ql/src/test/results/clientpositive/bucket2.q.out
@@ -18,6 +18,7 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -129,6 +130,41 @@ STAGE PLANS:
TotalFiles: 2
GatherStats: true
MultiFileSpray: true
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: complete
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct), _col1 (type: struct)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1
+ columns.types struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -163,6 +199,14 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.bucket2_1
+ Is Table Level Stats: true
+
PREHOOK: query: insert overwrite table bucket2_1
select * from src
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/bucket3.q.out b/ql/src/test/results/clientpositive/bucket3.q.out
index b1173e7..10ae348 100644
--- a/ql/src/test/results/clientpositive/bucket3.q.out
+++ b/ql/src/test/results/clientpositive/bucket3.q.out
@@ -18,6 +18,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -127,6 +129,34 @@ STAGE PLANS:
TotalFiles: 2
GatherStats: true
MultiFileSpray: true
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types string,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-0
Move Operator
@@ -160,6 +190,90 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-4
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.bucket3_1
+ Is Table Level Stats: false
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ null sort order: a
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types string,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types string,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:string
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
PREHOOK: query: insert overwrite table bucket3_1 partition (ds='1')
select * from src
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out
index ba79294..9f1bae8 100644
--- a/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out
+++ b/ql/src/test/results/clientpositive/bucket_map_join_spark1.q.out
@@ -117,13 +117,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
on a.key=b.key and b.ds="2008-04-08"
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:a
@@ -197,7 +199,7 @@ STAGE PLANS:
1 _col0 (type: int)
Position of Big Table: 1
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -257,6 +259,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -393,6 +422,83 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select a.key, a.value, b.value
from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
@@ -451,13 +557,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
on a.key=b.key and b.ds="2008-04-08"
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:a
@@ -531,7 +639,7 @@ STAGE PLANS:
1 _col0 (type: int)
Position of Big Table: 1
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -570,7 +678,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -591,6 +699,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -704,7 +839,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -727,6 +862,83 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select a.key, a.value, b.value
from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out
index 8974d4c..5e9eb51 100644
--- a/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out
+++ b/ql/src/test/results/clientpositive/bucket_map_join_spark2.q.out
@@ -101,13 +101,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
on a.key=b.key and b.ds="2008-04-08"
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_1:b
@@ -181,7 +183,7 @@ STAGE PLANS:
1 _col0 (type: int)
Position of Big Table: 0
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -241,6 +243,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -377,6 +406,83 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select a.key, a.value, b.value
from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
@@ -435,13 +541,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
on a.key=b.key and b.ds="2008-04-08"
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_1:b
@@ -515,7 +623,7 @@ STAGE PLANS:
1 _col0 (type: int)
Position of Big Table: 0
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -554,7 +662,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -575,6 +683,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -688,7 +823,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -711,6 +846,83 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select a.key, a.value, b.value
from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out
index 0453f99..3379726 100644
--- a/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out
+++ b/ql/src/test/results/clientpositive/bucket_map_join_spark3.q.out
@@ -101,13 +101,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
on a.key=b.key and b.ds="2008-04-08"
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:a
@@ -181,7 +183,7 @@ STAGE PLANS:
1 _col0 (type: int)
Position of Big Table: 1
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -241,6 +243,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -377,6 +406,83 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select a.key, a.value, b.value
from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
@@ -435,13 +541,15 @@ from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
on a.key=b.key and b.ds="2008-04-08"
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
$hdt$_0:a
@@ -515,7 +623,7 @@ STAGE PLANS:
1 _col0 (type: int)
Position of Big Table: 1
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -554,7 +662,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -575,6 +683,33 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 60 Data size: 6393 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -688,7 +823,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -711,6 +846,83 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ GatherStats: false
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
+ Path -> Alias:
+#### A masked pattern was here ####
+ Path -> Partition:
+#### A masked pattern was here ####
+ Partition
+ base file name: -mr-10002
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ column.name.delimiter ,
+ columns _col0,_col1,_col2
+ columns.types struct,struct,struct
+ escape.delim \
+ serialization.lib org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+ Truncated Path -> Alias:
+#### A masked pattern was here ####
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
+
PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result
select a.key, a.value, b.value
from srcbucket_mapjoin_part a join srcbucket_mapjoin_part_2 b
diff --git a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out
index 5743944..688fdfa 100644
--- a/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out
+++ b/ql/src/test/results/clientpositive/bucket_map_join_spark4.q.out
@@ -189,7 +189,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -212,7 +212,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -239,7 +239,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -262,7 +262,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -289,7 +289,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -312,7 +312,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -545,7 +545,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -568,7 +568,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -595,7 +595,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -618,7 +618,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -645,7 +645,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
@@ -668,7 +668,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
SORTBUCKETCOLSPREFIX TRUE
bucket_count 2
bucket_field_name key
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out
index 71b2924..406cca4 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin13.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin13.q.out
@@ -91,7 +91,7 @@ STAGE PLANS:
partition values:
part 1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -192,7 +192,7 @@ STAGE PLANS:
partition values:
part 1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count 2
bucket_field_name value
column.name.delimiter ,
@@ -241,7 +241,7 @@ STAGE PLANS:
partition values:
part 2
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -371,7 +371,7 @@ STAGE PLANS:
partition values:
part 1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -480,7 +480,7 @@ STAGE PLANS:
partition values:
part 2
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -615,7 +615,7 @@ STAGE PLANS:
partition values:
part 1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -724,7 +724,7 @@ STAGE PLANS:
partition values:
part 2
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -859,7 +859,7 @@ STAGE PLANS:
partition values:
part 1
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count 2
bucket_field_name key
column.name.delimiter ,
@@ -968,7 +968,7 @@ STAGE PLANS:
partition values:
part 2
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value":"true"}}
bucket_count 2
bucket_field_name key
column.name.delimiter ,
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
index b108607..7fe4aea 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin5.q.out
@@ -181,6 +181,7 @@ STAGE DEPENDENCIES:
Stage-5
Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
Stage-3 depends on stages: Stage-0
+ Stage-10 depends on stages: Stage-3
Stage-4
Stage-6
Stage-7 depends on stages: Stage-6
@@ -272,6 +273,22 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 121 Data size: 12786 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -376,6 +393,35 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcbucket_mapjoin_part/ds=2008-04-08 [b]
/srcbucket_mapjoin_part/ds=2008-04-09 [b]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-8
Conditional Operator
@@ -418,6 +464,14 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-10
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
Stage: Stage-4
Map Reduce
Map Operator Tree:
@@ -722,6 +776,7 @@ STAGE DEPENDENCIES:
Stage-5
Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
Stage-3 depends on stages: Stage-0
+ Stage-10 depends on stages: Stage-3
Stage-4
Stage-6
Stage-7 depends on stages: Stage-6
@@ -792,7 +847,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -813,6 +868,22 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -917,6 +988,35 @@ STAGE PLANS:
Truncated Path -> Alias:
/srcbucket_mapjoin_part_2/ds=2008-04-08 [b]
/srcbucket_mapjoin_part_2/ds=2008-04-09 [b]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-8
Conditional Operator
@@ -936,7 +1036,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -959,6 +1059,14 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-10
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
Stage: Stage-4
Map Reduce
Map Operator Tree:
@@ -973,7 +1081,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1003,7 +1111,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1024,7 +1132,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1060,7 +1168,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1090,7 +1198,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
@@ -1111,7 +1219,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- COLUMN_STATS_ACCURATE {"BASIC_STATS":"true"}
+ COLUMN_STATS_ACCURATE {"BASIC_STATS":"true","COLUMN_STATS":{"key":"true","value1":"true","value2":"true"}}
bucket_count -1
column.name.delimiter ,
columns key,value1,value2
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
index 4aa7f82..a4586ad 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out
@@ -82,6 +82,7 @@ STAGE DEPENDENCIES:
Stage-4
Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
Stage-2 depends on stages: Stage-0
+ Stage-10 depends on stages: Stage-2
Stage-3
Stage-5
Stage-6 depends on stages: Stage-5
@@ -212,6 +213,22 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 44 Data size: 4620 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -265,6 +282,35 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
/srcbucket_mapjoin [a]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-7
Conditional Operator
@@ -307,6 +353,14 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-10
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
Stage: Stage-3
Map Reduce
Map Operator Tree:
diff --git a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
index 09b5af9..fe36aa1 100644
--- a/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
+++ b/ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out
@@ -91,6 +91,7 @@ STAGE DEPENDENCIES:
Stage-4
Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
Stage-2 depends on stages: Stage-0
+ Stage-10 depends on stages: Stage-2
Stage-3
Stage-5
Stage-6 depends on stages: Stage-5
@@ -275,6 +276,22 @@ STAGE PLANS:
TotalFiles: 1
GatherStats: true
MultiFileSpray: false
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string)
+ outputColumnNames: key, value1, value2
+ Statistics: Num rows: 63 Data size: 6736 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value1, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ tag: -1
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct)
+ auto parallelism: false
Local Work:
Map Reduce Local Work
Path -> Alias:
@@ -328,6 +345,35 @@ STAGE PLANS:
name: default.srcbucket_mapjoin
Truncated Path -> Alias:
/srcbucket_mapjoin [a]
+ Needs Tagging: false
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ GlobalTableId: 0
+#### A masked pattern was here ####
+ NumFilesPerFileSink: 1
+ Statistics: Num rows: 1 Data size: 1476 Basic stats: COMPLETE Column stats: NONE
+#### A masked pattern was here ####
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ properties:
+ columns _col0,_col1,_col2
+ columns.types struct:struct:struct
+ escape.delim \
+ hive.serialization.extend.additional.nesting.levels true
+ serialization.escape.crlf true
+ serialization.format 1
+ serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ TotalFiles: 1
+ GatherStats: false
+ MultiFileSpray: false
Stage: Stage-7
Conditional Operator
@@ -370,6 +416,14 @@ STAGE PLANS:
Stats-Aggr Operator
#### A masked pattern was here ####
+ Stage: Stage-10
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value1, value2
+ Column Types: string, string, string
+ Table: default.bucketmapjoin_tmp_result
+ Is Table Level Stats: true
+
Stage: Stage-3
Map Reduce
Map Operator Tree:
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
index 165f0dc..132ec23 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_1.q.out
@@ -48,6 +48,7 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -84,6 +85,13 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
+
PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT x.key, x.value from
(
@@ -153,6 +161,7 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -189,6 +198,13 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
+
PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT * from
(
@@ -258,6 +274,7 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -294,6 +311,13 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
+
PREHOOK: query: EXPLAIN
INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT x.key+x.key, x.value from
@@ -312,6 +336,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -343,6 +369,22 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table2
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -359,6 +401,42 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-4
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: EXPLAIN
INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT x.k1, concat(x.v1, x.v1) from
@@ -377,6 +455,7 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -413,3 +492,10 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table2
+
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
index 11c7c39..6c3bf65 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_3.q.out
@@ -44,6 +44,7 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-3 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -80,6 +81,13 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: value, key
+ Column Types: string, int
+ Table: default.test_table2
+
PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT x.value, x.key from
(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
@@ -151,6 +159,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -182,6 +192,22 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table2
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), '1' (type: string)
+ outputColumnNames: value, key, ds
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(value, 16), compute_stats(key, 16)
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -198,6 +224,42 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-4
+ Column Stats Work
+ Column Stats Desc:
+ Columns: value, key
+ Column Types: string, int
+ Table: default.test_table2
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: struct), _col2 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: INSERT OVERWRITE TABLE test_table2 PARTITION (ds = '1')
SELECT x.key, x.value from
(SELECT a.key, a.value FROM test_table1 a WHERE a.ds = '1')x
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
index 1d794c3..d581f24 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_4.q.out
@@ -65,39 +65,14 @@ FROM test_table1 a JOIN test_table2 b
ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
- Stage-7 has a backup stage: Stage-1
- Stage-4 depends on stages: Stage-7
- Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
- Stage-8 has a backup stage: Stage-1
- Stage-5 depends on stages: Stage-8
- Stage-1
+ Stage-5 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-6
- Conditional Operator
-
- Stage: Stage-7
- Map Reduce Local Work
- Alias -> Map Local Tables:
- b
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- b
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: int)
- 1 key (type: int)
-
- Stage: Stage-4
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -106,7 +81,7 @@ STAGE PLANS:
Filter Operator
predicate: key is not null (type: boolean)
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
@@ -121,8 +96,6 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col1 (type: int)
value expressions: _col2 (type: string)
- Local Work:
- Map Reduce Local Work
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -134,6 +107,20 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), '1' (type: string)
+ outputColumnNames: key, key2, value, ds
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(key2, 16), compute_stats(value, 16)
+ keys: '1' (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -150,98 +137,37 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
- Stage: Stage-8
- Map Reduce Local Work
- Alias -> Map Local Tables:
- a
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- a
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 key (type: int)
- 1 key (type: int)
-
Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col7) (type: string)
- outputColumnNames: _col1, _col2
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Map-reduce partition columns: _col1 (type: int)
- value expressions: _col2 (type: string)
- Local Work:
- Map Reduce Local Work
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, key2, value
+ Column Types: int, int, string
+ Table: default.test_table3
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 key (type: int)
- 1 key (type: int)
- outputColumnNames: _col0, _col1, _col7
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col7) (type: string)
- outputColumnNames: _col1, _col2
- Reduce Output Operator
- key expressions: _col1 (type: int)
- sort order: +
- Map-reduce partition columns: _col1 (type: int)
- value expressions: _col2 (type: string)
+ Reduce Output Operator
+ key expressions: '1' (type: string)
+ sort order: +
+ Map-reduce partition columns: '1' (type: string)
+ value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1, _col2
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2)
+ keys: '1' (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Select Operator
+ expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), '1' (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, a.key, concat(a.value, b.value)
@@ -339,43 +265,14 @@ FROM test_table1 a JOIN test_table2 b
ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
- Stage-7 has a backup stage: Stage-1
- Stage-4 depends on stages: Stage-7
- Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
- Stage-8 has a backup stage: Stage-1
- Stage-5 depends on stages: Stage-8
- Stage-1
+ Stage-5 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-6
- Conditional Operator
-
- Stage: Stage-7
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_1:b
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_1:b
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-4
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -388,7 +285,7 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
@@ -400,8 +297,6 @@ STAGE PLANS:
sort order: +
Map-reduce partition columns: _col1 (type: string)
value expressions: _col0 (type: int)
- Local Work:
- Map Reduce Local Work
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string)
@@ -413,6 +308,20 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -429,104 +338,37 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
- Stage: Stage-8
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:a
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:a
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int)
- outputColumnNames: _col0
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- value expressions: _col0 (type: int)
- Local Work:
- Map Reduce Local Work
- Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col1 (type: string)
- sort order: +
- Map-reduce partition columns: _col1 (type: string)
- value expressions: _col0 (type: int)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: struct), _col2 (type: struct)
Reduce Operator Tree:
- Select Operator
- expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, a.value
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
index 1e70105..fb90070 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_5.q.out
@@ -65,43 +65,14 @@ FROM test_table1 a JOIN test_table2 b
ON a.key = b.key WHERE a.ds = '1' and b.ds = '1'
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
- Stage-7 has a backup stage: Stage-1
- Stage-4 depends on stages: Stage-7
- Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
- Stage-8 has a backup stage: Stage-1
- Stage-5 depends on stages: Stage-8
- Stage-1
+ Stage-5 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-6
- Conditional Operator
-
- Stage: Stage-7
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_1:b
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_1:b
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-4
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -114,7 +85,7 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
@@ -129,8 +100,6 @@ STAGE PLANS:
sort order: -
Map-reduce partition columns: _col0 (type: int)
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -142,6 +111,20 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -158,110 +141,37 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
- Stage: Stage-8
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:a
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:a
- TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: b
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col4
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: -
- Map-reduce partition columns: _col0 (type: int)
- value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
- alias: a
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col4
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col4) (type: string)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: -
- Map-reduce partition columns: _col0 (type: int)
- value expressions: _col1 (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: struct), _col2 (type: struct)
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, concat(a.value, b.value)
@@ -346,43 +256,14 @@ JOIN
ON a.key = b.key
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-6 is a root stage , consists of Stage-7, Stage-8, Stage-1
- Stage-7 has a backup stage: Stage-1
- Stage-4 depends on stages: Stage-7
- Stage-0 depends on stages: Stage-1, Stage-4, Stage-5
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
- Stage-8 has a backup stage: Stage-1
- Stage-5 depends on stages: Stage-8
- Stage-1
+ Stage-5 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
- Stage: Stage-6
- Conditional Operator
-
- Stage: Stage-7
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_1:test_table2
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_1:test_table2
- TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
- Stage: Stage-4
+ Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
@@ -395,7 +276,7 @@ STAGE PLANS:
expressions: key (type: int), value (type: string)
outputColumnNames: _col0, _col1
Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
+ Sorted Merge Bucket Map Join Operator
condition map:
Inner Join 0 to 1
keys:
@@ -410,8 +291,6 @@ STAGE PLANS:
sort order: -
Map-reduce partition columns: _col0 (type: int)
value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
Reduce Operator Tree:
Select Operator
expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
@@ -423,6 +302,20 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.test_table3
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), '1' (type: string)
+ outputColumnNames: key, value, ds
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ keys: ds (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -439,110 +332,37 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
- Stage: Stage-8
- Map Reduce Local Work
- Alias -> Map Local Tables:
- $hdt$_0:test_table1
- Fetch Operator
- limit: -1
- Alias -> Map Local Operator Tree:
- $hdt$_0:test_table1
- TableScan
- alias: test_table1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- HashTable Sink Operator
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
-
Stage: Stage-5
- Map Reduce
- Map Operator Tree:
- TableScan
- alias: test_table2
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 84 Data size: 736 Basic stats: COMPLETE Column stats: NONE
- Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: -
- Map-reduce partition columns: _col0 (type: int)
- value expressions: _col1 (type: string)
- Local Work:
- Map Reduce Local Work
- Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.test_table3
- Stage: Stage-1
+ Stage: Stage-3
Map Reduce
Map Operator Tree:
TableScan
- alias: test_table1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Filter Operator
- predicate: key is not null (type: boolean)
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Select Operator
- expressions: key (type: int), value (type: string)
- outputColumnNames: _col0, _col1
- Statistics: Num rows: 10 Data size: 70 Basic stats: COMPLETE Column stats: NONE
- Sorted Merge Bucket Map Join Operator
- condition map:
- Inner Join 0 to 1
- keys:
- 0 _col0 (type: int)
- 1 _col0 (type: int)
- outputColumnNames: _col0, _col1, _col3
- Select Operator
- expressions: _col0 (type: int), concat(_col1, _col3) (type: string)
- outputColumnNames: _col0, _col1
- Reduce Output Operator
- key expressions: _col0 (type: int)
- sort order: -
- Map-reduce partition columns: _col0 (type: int)
- value expressions: _col1 (type: string)
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ value expressions: _col1 (type: struct), _col2 (type: struct)
Reduce Operator Tree:
- Select Operator
- expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string)
- outputColumnNames: _col0, _col1
- File Output Operator
- compressed: false
- table:
- input format: org.apache.hadoop.mapred.TextInputFormat
- output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
- serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
- name: default.test_table3
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ keys: KEY._col0 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2
+ Select Operator
+ expressions: _col1 (type: struct), _col2 (type: struct), _col0 (type: string)
+ outputColumnNames: _col0, _col1, _col2
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, concat(a.value, b.value)
diff --git a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out
index f3d3006..29f8896 100644
--- a/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out
+++ b/ql/src/test/results/clientpositive/bucketsortoptimize_insert_8.q.out
@@ -68,6 +68,7 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -116,6 +117,13 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-4
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, key2, value
+ Column Types: int, int, string
+ Table: default.test_table3
+
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT a.key, b.key, concat(a.value, b.value)
FROM test_table1 a JOIN test_table2 b
@@ -197,6 +205,7 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2
STAGE PLANS:
Stage: Stage-1
@@ -245,6 +254,13 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-4
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, key2, value
+ Column Types: int, int, string
+ Table: default.test_table3
+
PREHOOK: query: INSERT OVERWRITE TABLE test_table3 PARTITION (ds = '1')
SELECT b.key, a.key, concat(a.value, b.value)
FROM test_table1 a JOIN test_table2 b
diff --git a/ql/src/test/results/clientpositive/case_sensitivity.q.out b/ql/src/test/results/clientpositive/case_sensitivity.q.out
index b3969cc..0158826 100644
--- a/ql/src/test/results/clientpositive/case_sensitivity.q.out
+++ b/ql/src/test/results/clientpositive/case_sensitivity.q.out
@@ -20,6 +20,7 @@ STAGE DEPENDENCIES:
Stage-4
Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
Stage-2 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-2
Stage-3
Stage-5
Stage-6 depends on stages: Stage-5
@@ -46,6 +47,32 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string)
+ outputColumnNames: key, value
+ Statistics: Num rows: 3 Data size: 837 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(value, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-7
Conditional Operator
@@ -69,6 +96,13 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, value
+ Column Types: int, string
+ Table: default.dest1
+
Stage: Stage-3
Map Reduce
Map Operator Tree:
diff --git a/ql/src/test/results/clientpositive/cast1.q.out b/ql/src/test/results/clientpositive/cast1.q.out
index 1a246c0..90f3e0c 100644
--- a/ql/src/test/results/clientpositive/cast1.q.out
+++ b/ql/src/test/results/clientpositive/cast1.q.out
@@ -18,6 +18,7 @@ STAGE DEPENDENCIES:
Stage-4
Stage-0 depends on stages: Stage-4, Stage-3, Stage-6
Stage-2 depends on stages: Stage-0
+ Stage-8 depends on stages: Stage-2
Stage-3
Stage-5
Stage-6 depends on stages: Stage-5
@@ -44,6 +45,32 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: boolean), _col6 (type: int)
+ outputColumnNames: c1, c2, c3, c4, c5, c6, c7
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16), compute_stats(c5, 16), compute_stats(c6, 16), compute_stats(c7, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 3348 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 3348 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4), compute_stats(VALUE._col5), compute_stats(VALUE._col6)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
+ Statistics: Num rows: 1 Data size: 3372 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 3372 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Stage: Stage-7
Conditional Operator
@@ -67,6 +94,13 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-8
+ Column Stats Work
+ Column Stats Desc:
+ Columns: c1, c2, c3, c4, c5, c6, c7
+ Column Types: int, double, double, double, int, string, int
+ Table: default.dest1
+
Stage: Stage-3
Map Reduce
Map Operator Tree:
diff --git a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
index f260f03..d94cf30 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_annotate_stats_groupby.q.out
@@ -66,11 +66,11 @@ STAGE PLANS:
Processor Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
outputColumnNames: state, locid, zip, year
- Statistics: Num rows: 8 Data size: 796 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: analyze table loc_orc compute statistics for columns state
@@ -106,22 +106,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: sq1:loc_orc
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: state, locid
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 720 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: count()
keys: state (type: string), locid (type: int)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: int)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: int)
- Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: bigint)
Reduce Operator Tree:
Group By Operator
@@ -129,13 +129,13 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: int)
mode: mergepartial
outputColumnNames: state, locid, $f2
- Statistics: Num rows: 7 Data size: 658 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
aggregations: min(locid)
keys: state (type: string), $f2 (type: bigint)
mode: hash
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
table:
@@ -151,7 +151,7 @@ STAGE PLANS:
key expressions: _col0 (type: string), _col1 (type: bigint)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
value expressions: _col2 (type: int)
Reduce Operator Tree:
Group By Operator
@@ -159,10 +159,10 @@ STAGE PLANS:
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: state, $f2, $f2_0
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 784 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -767,30 +767,30 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: loc_orc
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: state (type: string), zip (type: bigint)
outputColumnNames: state, zip
- Statistics: Num rows: 8 Data size: 688 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 8 Data size: 752 Basic stats: COMPLETE Column stats: COMPLETE
Group By Operator
keys: state (type: string), zip (type: bigint)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Output Operator
key expressions: _col0 (type: string), _col1 (type: bigint)
sort order: ++
Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint)
- Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
Reduce Operator Tree:
Group By Operator
keys: KEY._col0 (type: string), KEY._col1 (type: bigint)
mode: mergepartial
outputColumnNames: state, zip
- Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
- Statistics: Num rows: 4 Data size: 344 Basic stats: COMPLETE Column stats: PARTIAL
+ Statistics: Num rows: 4 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git a/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out
index b296280..f8fd963 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_auto_join17.q.out
@@ -15,13 +15,15 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.*
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
- Stage-5 is a root stage
- Stage-4 depends on stages: Stage-5
- Stage-0 depends on stages: Stage-4
+ Stage-6 is a root stage
+ Stage-5 depends on stages: Stage-6
+ Stage-0 depends on stages: Stage-5
Stage-2 depends on stages: Stage-0
+ Stage-7 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-5
STAGE PLANS:
- Stage: Stage-5
+ Stage: Stage-6
Map Reduce Local Work
Alias -> Map Local Tables:
src1
@@ -44,7 +46,7 @@ STAGE PLANS:
0 key (type: string)
1 key (type: string)
- Stage: Stage-4
+ Stage: Stage-5
Map Reduce
Map Operator Tree:
TableScan
@@ -77,6 +79,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type: string), _col2 (type: int), _col3 (type: string)
+ outputColumnNames: key1, value1, key2, value2
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key1, 16), compute_stats(value1, 16), compute_stats(key2, 16), compute_stats(value2, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Local Work:
Map Reduce Local Work
@@ -93,6 +110,39 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-7
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key1, value1, key2, value2
+ Column Types: int, string, int, string
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1936 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1944 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest1 SELECT src1.*, src2.*
PREHOOK: type: QUERY
diff --git a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
index d4d70bc..05ca4bd 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out
@@ -22,6 +22,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -65,6 +67,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
+ outputColumnNames: key, c1, c2, c3, c4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -79,6 +96,39 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-4
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, c1, c2, c3, c4
+ Column Types: string, int, string, int, int
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct)
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 2464 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest1
SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value)
@@ -132,6 +182,8 @@ STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-2 depends on stages: Stage-0
+ Stage-4 depends on stages: Stage-2, Stage-3
+ Stage-3 depends on stages: Stage-1
STAGE PLANS:
Stage: Stage-1
@@ -175,6 +227,21 @@ STAGE PLANS:
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
name: default.dest1
+ Select Operator
+ expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int)
+ outputColumnNames: key, c1, c2, c3, c4
+ Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ aggregations: compute_stats(key, 16), compute_stats(c1, 16), compute_stats(c2, 16), compute_stats(c3, 16), compute_stats(c4, 16)
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4
+ Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Stage: Stage-0
Move Operator
@@ -189,6 +256,39 @@ STAGE PLANS:
Stage: Stage-2
Stats-Aggr Operator
+ Stage: Stage-4
+ Column Stats Work
+ Column Stats Desc:
+ Columns: key, c1, c2, c3, c4
+ Column Types: string, int, string, int, int
+ Table: default.dest1
+
+ Stage: Stage-3
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 2452 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct