diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 5344f36887..a8dffe3dcd 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1676,6 +1676,8 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "Whether or not to use a binary search to find the entries in an index table that match the filter, where possible"), // Statistics + HIVESTATSNDVFACTOR("hive.stats.ndv.estimate.factor", (long)2, + "NDV factor used to estimate NDV in absence of statistics."), HIVESTATSAUTOGATHER("hive.stats.autogather", true, "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", false, diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index f4a53df002..e26f6c726c 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -516,6 +516,7 @@ minillaplocal.query.files=acid_globallimit.q,\ join_nulls.q,\ join_nullsafe.q,\ join_is_not_distinct_from.q,\ + join_reordering_no_stats.q,\ leftsemijoin_mr.q,\ limit_join_transpose.q,\ lineage2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java index 1d49568c8c..ac89bd8212 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java @@ -302,7 +302,7 @@ private void updateColStats(Set projIndxLst, boolean allowNullColumnFor if (!hiveTblMetadata.isPartitioned()) { // 2.1 Handle the case for unpartitioned table. hiveColStats = StatsUtils.getTableColumnStats(hiveTblMetadata, hiveNonPartitionCols, - nonPartColNamesThatRqrStats); + nonPartColNamesThatRqrStats, hiveConf); // 2.1.1 Record Column Names that we needed stats for but couldn't if (hiveColStats == null) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index 76f7daeb1b..23715ae2e6 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -297,9 +297,12 @@ public static Statistics collectStatistics(HiveConf conf, PrunedPartitionList pa // add partition column stats addParitionColumnStats(conf, neededColumns, referencedColumns, schema, table, partList, emptyStats); - stats.addToColumnStats(emptyStats); stats.addToDataSize(getDataSizeFromColumnStats(nr, emptyStats)); stats.updateColumnStatsState(deriveStatType(emptyStats, referencedColumns)); + + // estimate stats + emptyStats = estimateStats(table, schema, neededColumns, conf, nr); + stats.addToColumnStats(emptyStats); } else { List colStats = aggrStats.getColStats(); if (colStats.size() != neededColumns.size()) { @@ -780,6 +783,43 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab return cs; } + private static ColStatistics estimateColStats(long numRows, String colName, HiveConf conf) { + ColStatistics cs = new ColStatistics(); + cs.setColumnName(colName); + long ndv_factor = HiveConf.getLongVar(conf, ConfVars.HIVESTATSNDVFACTOR); + cs.setCountDistint(numRows/ndv_factor); + return cs; + } + private static List estimateStats(Table table, List schema, + List neededColumns, HiveConf conf, long numRows) { + + List stats = new ArrayList(neededColumns.size()); + + // estimated for non-partition table + long nr = numRows; + if (!table.isPartitioned()) { + + long ds = getDataSize(conf, table); + nr = getNumRows(conf, schema, neededColumns, table, ds); + } + for (int i = 0; i < neededColumns.size(); i++) { + ColStatistics cs = estimateColStats(nr, neededColumns.get(i), conf); + stats.add(cs); + } + return stats; + } + + public static List getTableColumnStats( + Table table, List schema, List neededColumns, + HiveConf hiveconf) { + List stats = null; + stats = getTableColumnStats(table, schema, neededColumns); + + if(stats.isEmpty() ) { + stats = estimateStats(table, schema, neededColumns, hiveconf, 0); + } + return stats; + } /** * Get table level column statistics from metastore for needed columns * @param table @@ -803,6 +843,7 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab try { List colStat = Hive.get().getTableColumnStatistics( dbName, tabName, neededColsInTable); + stats = convertColStats(colStat, tabName); } catch (HiveException e) { LOG.error("Failed to retrieve table statistics: ", e); diff --git a/ql/src/test/queries/clientpositive/join_reordering_no_stats.q b/ql/src/test/queries/clientpositive/join_reordering_no_stats.q new file mode 100644 index 0000000000..157d8de7a3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/join_reordering_no_stats.q @@ -0,0 +1,56 @@ +set hive.stats.autogather=false; + +create table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, +S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING); + +CREATE TABLE lineitem_nostats (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|'; + +CREATE TABLE part_nostats( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +); + +-- should not have cross join +explain select count(1) from part,supplier,lineitem where p_partkey = l_partkey and s_suppkey = l_suppkey; + +CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile; + +LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA'); +LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK'); +LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK'); + +-- partitioned table +explain select count(1) from Employee_Part,supplier,lineitem where employeeID= l_partkey and s_suppkey = l_suppkey; + +drop table Employee_Part; +drop table supplier; +drop table lineitem_nostats; +drop table part_nostats; diff --git a/ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out b/ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out new file mode 100644 index 0000000000..c41ef9a733 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/join_reordering_no_stats.q.out @@ -0,0 +1,442 @@ +PREHOOK: query: create table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, +S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@supplier +POSTHOOK: query: create table supplier (S_SUPPKEY INT, S_NAME STRING, S_ADDRESS STRING, S_NATIONKEY INT, +S_PHONE STRING, S_ACCTBAL DOUBLE, S_COMMENT STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@supplier +PREHOOK: query: CREATE TABLE lineitem_nostats (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@lineitem_nostats +POSTHOOK: query: CREATE TABLE lineitem_nostats (L_ORDERKEY INT, + L_PARTKEY INT, + L_SUPPKEY INT, + L_LINENUMBER INT, + L_QUANTITY DOUBLE, + L_EXTENDEDPRICE DOUBLE, + L_DISCOUNT DOUBLE, + L_TAX DOUBLE, + L_RETURNFLAG STRING, + L_LINESTATUS STRING, + l_shipdate STRING, + L_COMMITDATE STRING, + L_RECEIPTDATE STRING, + L_SHIPINSTRUCT STRING, + L_SHIPMODE STRING, + L_COMMENT STRING) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@lineitem_nostats +PREHOOK: query: CREATE TABLE part_nostats( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@part_nostats +POSTHOOK: query: CREATE TABLE part_nostats( + p_partkey INT, + p_name STRING, + p_mfgr STRING, + p_brand STRING, + p_type STRING, + p_size INT, + p_container STRING, + p_retailprice DOUBLE, + p_comment STRING +) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@part_nostats +PREHOOK: query: explain select count(1) from part,supplier,lineitem where p_partkey = l_partkey and s_suppkey = l_suppkey +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from part,supplier,lineitem where p_partkey = l_partkey and s_suppkey = l_suppkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: supplier + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: s_suppkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: s_suppkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: part + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: p_partkey is not null (type: boolean) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: p_partkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 110 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 110 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 121 Data size: 968 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@Employee_Part +POSTHOOK: query: CREATE TABLE Employee_Part(employeeID int, employeeName String) partitioned by (employeeSalary double, country string) +row format delimited fields terminated by '|' stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@Employee_Part +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='2000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=2000.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='4000.0', country='USA') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=4000.0/country=USA +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee2.dat" INTO TABLE Employee_Part partition(employeeSalary='3500.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3500.0/country=UK +PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@employee_part +POSTHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/employee.dat" INTO TABLE Employee_Part partition(employeeSalary='3000.0', country='UK') +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@employee_part +POSTHOOK: Output: default@employee_part@employeesalary=3000.0/country=UK +PREHOOK: query: explain select count(1) from Employee_Part,supplier,lineitem where employeeID= l_partkey and s_suppkey = l_suppkey +PREHOOK: type: QUERY +POSTHOOK: query: explain select count(1) from Employee_Part,supplier,lineitem where employeeID= l_partkey and s_suppkey = l_suppkey +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Map 6 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: lineitem + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (l_partkey is not null and l_suppkey is not null) (type: boolean) + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: l_partkey (type: int), l_suppkey (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 100 Data size: 800 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: supplier + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: s_suppkey is not null (type: boolean) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Select Operator + expressions: s_suppkey (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: employee_part + Statistics: Num rows: 116 Data size: 1394 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: employeeid is not null (type: boolean) + Statistics: Num rows: 116 Data size: 1394 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: employeeid (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 116 Data size: 1394 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 116 Data size: 1394 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 110 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 110 Data size: 880 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Statistics: Num rows: 127 Data size: 1533 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table Employee_Part +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@employee_part +PREHOOK: Output: default@employee_part +POSTHOOK: query: drop table Employee_Part +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@employee_part +POSTHOOK: Output: default@employee_part +PREHOOK: query: drop table supplier +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@supplier +PREHOOK: Output: default@supplier +POSTHOOK: query: drop table supplier +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@supplier +POSTHOOK: Output: default@supplier +PREHOOK: query: drop table lineitem_nostats +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@lineitem_nostats +PREHOOK: Output: default@lineitem_nostats +POSTHOOK: query: drop table lineitem_nostats +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@lineitem_nostats +POSTHOOK: Output: default@lineitem_nostats +PREHOOK: query: drop table part_nostats +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@part_nostats +PREHOOK: Output: default@part_nostats +POSTHOOK: query: drop table part_nostats +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@part_nostats +POSTHOOK: Output: default@part_nostats