diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index db8866d..22ac682 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -408,6 +408,7 @@ minimr.query.negative.files=cluster_tasklog_retrieval.q,\ spark.query.files=add_part_multiple.q \ alter_merge_orc.q \ alter_merge_stats_orc.q \ + auto_smb_mapjoin_14.q \ avro_compression_enabled_native.q \ avro_decimal_native.q \ bucket2.q \ @@ -426,18 +427,31 @@ spark.query.files=add_part_multiple.q \ escape_orderby1.q \ escape_sortby1.q \ groupby1.q \ + groupby10.q \ + groupby11.q \ groupby2.q \ groupby3.q \ groupby3_map.q \ groupby3_map_multi_distinct.q \ + groupby3_map_skew.q \ groupby3_noskew.q \ groupby3_noskew_multi_distinct.q \ groupby4.q \ + groupby7.q \ groupby7_map.q \ groupby7_map_multi_single_reducer.q \ groupby7_map_skew.q \ groupby7_noskew.q \ + groupby7_noskew_multi_single_reducer.q \ + groupby8.q \ + groupby8_map.q \ + groupby8_map_skew.q \ + groupby8_noskew.q \ + groupby9.q \ + groupby_complex_types.q \ + groupby_complex_types_multi_single_reducer.q \ groupby_cube1.q \ + groupby_multi_insert_common_distinct.q \ groupby_multi_single_reducer.q \ groupby_multi_single_reducer2.q \ groupby_multi_single_reducer3.q \ @@ -514,6 +528,7 @@ spark.query.files=add_part_multiple.q \ parallel.q \ parallel_join0.q, \ parallel_join1.q, \ + pcr.q \ ppd_multi_insert.q \ ppd_transform.q \ ptf_decimal.q \ @@ -536,9 +551,13 @@ spark.query.files=add_part_multiple.q \ script_env_var2.q \ script_pipe.q \ scriptfile1.q \ + smb_mapjoin_13.q \ + smb_mapjoin_15.q \ + smb_mapjoin_16.q \ sort.q \ spark_test.q \ subquery_multiinsert.q \ + table_access_keys_stats.q \ temp_table.q \ timestamp_1.q \ timestamp_2.q \ diff --git ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out new file mode 100644 index 0000000..e64d4fb --- /dev/null +++ ql/src/test/results/clientpositive/spark/auto_smb_mapjoin_14.q.out @@ -0,0 +1,2175 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE tbl1(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl1 +PREHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tbl2 +POSTHOOK: query: CREATE TABLE tbl2(key int, value string) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tbl2 +PREHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl1 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl1 +POSTHOOK: Lineage: tbl1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tbl2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table tbl2 +select * from src where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tbl2 +POSTHOOK: Lineage: tbl2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tbl2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of sub-query. It should be converted to a sort-merge join +explain +select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed as part of more than one sub-query. It should be converted to a sort-merge join +explain +select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from +( + select key, count(*) from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 + group by key +) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +6 +PREHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- A join is being performed across different sub-queries, where a join is being performed in each of them. +-- Each sub-query should be converted to a sort-merge join. +explain +select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 3 (GROUP PARTITION-LEVEL SORT, 1), Reducer 8 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 7 <- Map 6 (GROUP PARTITION-LEVEL SORT, 1), Map 9 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 8 <- Reducer 7 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 6 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 9 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col3 + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 22 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} + 1 + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 8 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select src1.key, src1.cnt1, src2.cnt1 from +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq1 group by key +) src1 +join +( + select key, count(*) as cnt1 from + ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key + ) subq2 group by key +) src2 +on src1.key = src2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +0 9 9 +2 1 1 +4 1 1 +5 9 9 +8 1 1 +9 1 1 +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters, it should +-- be converted to a sort-merge join, although there is more than one level of sub-query +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join tbl2 b + on subq2.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Both the tables are nested sub-queries i.e more then 1 level of sub-query. +-- The join should be converted to a sort-merge join +explain +select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 + join + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq3 + where key < 6 + ) subq4 + on subq2.key = subq4.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +#### A masked pattern was here #### +20 +PREHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain +select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- The subquery itself is being joined. Since the sub-query only contains selects and filters and the join key +-- is not getting modified, it should be converted to a sort-merge join. Note that the sub-query modifies one +-- item, but that is not part of the join key. +explain +select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 8) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 3 Data size: 13 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, concat(a.value, a.value) as value from tbl1 a where key < 8) subq1 + join + (select a.key as key, concat(a.value, a.value) as value from tbl2 a where key < 8) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed +explain +select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Since the join key is modified by the sub-query, neither sort-merge join not bucketized map-side +-- join should be performed +explain +select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: (key + 1) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col0 is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key +1 as key, concat(a.value, a.value) as value from tbl1 a) subq1 + join + (select a.key +1 as key, concat(a.value, a.value) as value from tbl2 a) subq2 + on subq1.key = subq2.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +22 +PREHOOK: query: -- One of the tables is a sub-query and the other is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- One of the tables is a sub-query and the other is not. +-- It should be converted to a sort-merge join. +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join tbl2 a on subq1.key = a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +POSTHOOK: query: -- There are more than 2 inputs to the join, all of them being sub-queries. +-- It should be converted to to a sort-merge join +explain +select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on (subq1.key = subq2.key) + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1), Map 5 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Map 5 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key < 6) and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 3 Data size: 12 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 + 1 + 2 + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 6 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from + (select a.key as key, a.value as value from tbl1 a where key < 6) subq1 + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq2 + on subq1.key = subq2.key + join + (select a.key as key, a.value as value from tbl2 a where key < 6) subq3 + on (subq1.key = subq3.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +56 +PREHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain +select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is being performed on a nested sub-query, and an aggregation is performed after that. +-- The join should be converted to a sort-merge join +explain +select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (((key < 8) and (key < 6)) and key is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +#### A masked pattern was here #### +POSTHOOK: query: select count(*) from ( + select subq2.key as key, subq2.value as value1, b.value as value2 from + ( + select * from + ( + select a.key as key, a.value as value from tbl1 a where key < 8 + ) subq1 + where key < 6 + ) subq2 +join tbl2 b +on subq2.key = b.key) a +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +#### A masked pattern was here #### +20 +PREHOOK: query: CREATE TABLE dest1(key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: CREATE TABLE dest2(key int, val1 string, val2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: -- The join is followed by a multi-table insert. It should be converted to +-- a sort-merge join +explain +from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, val1, val2 +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is followed by a multi-table insert. It should be converted to +-- a sort-merge join +explain +from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, val1, val2 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 3 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {VALUE._col0} + outputColumnNames: _col0, _col1, _col6 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, val1, val2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, val1, val2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 SIMPLE [(tbl2)b.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +0 val_0 val_0 +2 val_2 val_2 +4 val_4 val_4 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +5 val_5 val_5 +8 val_8 val_8 +9 val_9 val_9 +PREHOOK: query: DROP TABLE dest2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest2 +PREHOOK: Output: default@dest2 +POSTHOOK: query: DROP TABLE dest2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest2 +POSTHOOK: Output: default@dest2 +PREHOOK: query: CREATE TABLE dest2(key int, cnt int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: CREATE TABLE dest2(key int, cnt int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. +-- It should be converted to a sort-merge join +explain +from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, count(*) group by key +PREHOOK: type: QUERY +POSTHOOK: query: -- The join is followed by a multi-table insert, and one of the inserts involves a reducer. +-- It should be converted to a sort-merge join +explain +from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, count(*) group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 0 Data size: 80 Basic stats: PARTIAL Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + value expressions: value (type: string) + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 20 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 10 Data size: 40 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Select Operator + expressions: _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 20 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, count(*) group by key +PREHOOK: type: QUERY +PREHOOK: Input: default@tbl1 +PREHOOK: Input: default@tbl2 +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from ( + select a.key as key, a.value as val1, b.value as val2 from tbl1 a join tbl2 b on a.key = b.key +) subq1 +insert overwrite table dest1 select key, val1 +insert overwrite table dest2 select key, count(*) group by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tbl1 +POSTHOOK: Input: default@tbl2 +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.value SIMPLE [(tbl1)a.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.cnt EXPRESSION [(tbl1)a.null, (tbl2)b.null, ] +POSTHOOK: Lineage: dest2.key SIMPLE [(tbl1)a.FieldSchema(name:key, type:int, comment:null), ] +PREHOOK: query: select * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +9 val_9 +PREHOOK: query: select * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 9 +2 1 +4 1 +5 9 +8 1 +9 1 diff --git ql/src/test/results/clientpositive/spark/groupby10.q.out ql/src/test/results/clientpositive/spark/groupby10.q.out new file mode 100644 index 0000000..2095843 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby10.q.out @@ -0,0 +1,680 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key INT, val1 INT, val2 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key INT, val1 INT, val2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(key INT, val1 INT, val2 INT) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: CREATE TABLE dest2(key INT, val1 INT, val2 INT) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@INPUT +POSTHOOK: query: CREATE TABLE INPUT(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@INPUT +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE INPUT +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@input +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/kv5.txt' INTO TABLE INPUT +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@input +PREHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: input + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Group By Operator + aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +128 1 1 +150 1 1 +165 1 1 +193 1 1 +213 3 2 +224 1 1 +238 3 3 +255 1 1 +265 1 1 +27 1 1 +273 1 1 +278 1 1 +311 1 1 +369 1 1 +401 1 1 +409 1 1 +484 1 1 +66 1 1 +86 1 1 +98 1 1 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +128 128 128 +150 150 150 +165 165 165 +193 193 193 +213 640 427 +224 224 224 +238 717 717 +255 255 255 +265 265 265 +27 27 27 +273 273 273 +278 278 278 +311 311 311 +369 369 369 +401 401 401 +409 409 409 +484 484 484 +66 66 66 +86 86 86 +98 98 98 +PREHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: input + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Group By Operator + aggregations: sum(KEY._col0), sum(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, count(substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(substr(INPUT.value,5)), sum(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +128 1 1 +150 1 1 +165 1 1 +193 1 1 +213 3 2 +224 1 1 +238 3 3 +255 1 1 +265 1 1 +27 1 1 +273 1 1 +278 1 1 +311 1 1 +369 1 1 +401 1 1 +409 1 1 +484 1 1 +66 1 1 +86 1 1 +98 1 1 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +128 128 128 +150 150 150 +165 165 165 +193 193 193 +213 640 427 +224 224 224 +238 717 717 +255 255 255 +265 265 265 +27 27 27 +273 273 273 +278 278 278 +311 311 311 +369 369 369 +401 401 401 +409 409 409 +484 484 484 +66 66 66 +86 86 86 +98 98 98 +PREHOOK: query: -- HIVE-3852 Multi-groupby optimization fails when same distinct column is used twice or more +EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-3852 Multi-groupby optimization fails when same distinct column is used twice or more +EXPLAIN +FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: input + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: int) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(DISTINCT KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: bigint) + Group By Operator + aggregations: sum(DISTINCT KEY._col0), avg(DISTINCT KEY._col0) + keys: VALUE._col0 (type: int) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 280 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), avg(VALUE._col1) + keys: KEY._col0 (type: int) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 140 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +PREHOOK: type: QUERY +PREHOOK: Input: default@input +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM INPUT +INSERT OVERWRITE TABLE dest1 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), count(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +INSERT OVERWRITE TABLE dest2 SELECT INPUT.key, sum(distinct substr(INPUT.value,5)), avg(distinct substr(INPUT.value,5)) GROUP BY INPUT.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@input +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest1.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest1.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.key SIMPLE [(input)input.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: dest2.val1 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(input)input.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: SELECT * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +128 128 1 +150 150 1 +165 165 1 +193 193 1 +213 427 2 +224 224 1 +238 717 3 +255 255 1 +265 265 1 +27 27 1 +273 273 1 +278 278 1 +311 311 1 +369 369 1 +401 401 1 +409 409 1 +484 484 1 +66 66 1 +86 86 1 +98 98 1 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +128 128 128 +150 150 150 +165 165 165 +193 193 193 +213 427 213 +224 224 224 +238 717 239 +255 255 255 +265 265 265 +27 27 27 +273 273 273 +278 278 278 +311 311 311 +369 369 369 +401 401 401 +409 409 409 +484 484 484 +66 66 66 +86 86 86 +98 98 98 diff --git ql/src/test/results/clientpositive/spark/groupby11.q.out ql/src/test/results/clientpositive/spark/groupby11.q.out new file mode 100644 index 0000000..70db5a5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby11.q.out @@ -0,0 +1,828 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key STRING, val1 INT, val2 INT) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key STRING, val1 INT, val2 INT) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: CREATE TABLE dest2(key STRING, val1 INT, val2 INT) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: CREATE TABLE dest2(key STRING, val1 INT, val2 INT) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string), substr(value, 5) (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Group By Operator + aggregations: count(KEY._col0), count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 111 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + partition: + ds 111 + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1@ds=111 +PREHOOK: Output: default@dest2@ds=111 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 partition(ds='111') + SELECT src.value, count(src.key), count(distinct src.key) GROUP BY src.value +INSERT OVERWRITE TABLE dest2 partition(ds='111') + SELECT substr(src.value, 5), count(src.key), count(distinct src.key) GROUP BY substr(src.value, 5) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1@ds=111 +POSTHOOK: Output: default@dest2@ds=111 +POSTHOOK: Lineage: dest1 PARTITION(ds=111).key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).key EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).val1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2 PARTITION(ds=111).val2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT * from dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +PREHOOK: Input: default@dest1@ds=111 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +POSTHOOK: Input: default@dest1@ds=111 +#### A masked pattern was here #### +val_0 3 1 111 +val_10 1 1 111 +val_100 2 1 111 +val_103 2 1 111 +val_104 2 1 111 +val_105 1 1 111 +val_11 1 1 111 +val_111 1 1 111 +val_113 2 1 111 +val_114 1 1 111 +val_116 1 1 111 +val_118 2 1 111 +val_119 3 1 111 +val_12 2 1 111 +val_120 2 1 111 +val_125 2 1 111 +val_126 1 1 111 +val_128 3 1 111 +val_129 2 1 111 +val_131 1 1 111 +val_133 1 1 111 +val_134 2 1 111 +val_136 1 1 111 +val_137 2 1 111 +val_138 4 1 111 +val_143 1 1 111 +val_145 1 1 111 +val_146 2 1 111 +val_149 2 1 111 +val_15 2 1 111 +val_150 1 1 111 +val_152 2 1 111 +val_153 1 1 111 +val_155 1 1 111 +val_156 1 1 111 +val_157 1 1 111 +val_158 1 1 111 +val_160 1 1 111 +val_162 1 1 111 +val_163 1 1 111 +val_164 2 1 111 +val_165 2 1 111 +val_166 1 1 111 +val_167 3 1 111 +val_168 1 1 111 +val_169 4 1 111 +val_17 1 1 111 +val_170 1 1 111 +val_172 2 1 111 +val_174 2 1 111 +val_175 2 1 111 +val_176 2 1 111 +val_177 1 1 111 +val_178 1 1 111 +val_179 2 1 111 +val_18 2 1 111 +val_180 1 1 111 +val_181 1 1 111 +val_183 1 1 111 +val_186 1 1 111 +val_187 3 1 111 +val_189 1 1 111 +val_19 1 1 111 +val_190 1 1 111 +val_191 2 1 111 +val_192 1 1 111 +val_193 3 1 111 +val_194 1 1 111 +val_195 2 1 111 +val_196 1 1 111 +val_197 2 1 111 +val_199 3 1 111 +val_2 1 1 111 +val_20 1 1 111 +val_200 2 1 111 +val_201 1 1 111 +val_202 1 1 111 +val_203 2 1 111 +val_205 2 1 111 +val_207 2 1 111 +val_208 3 1 111 +val_209 2 1 111 +val_213 2 1 111 +val_214 1 1 111 +val_216 2 1 111 +val_217 2 1 111 +val_218 1 1 111 +val_219 2 1 111 +val_221 2 1 111 +val_222 1 1 111 +val_223 2 1 111 +val_224 2 1 111 +val_226 1 1 111 +val_228 1 1 111 +val_229 2 1 111 +val_230 5 1 111 +val_233 2 1 111 +val_235 1 1 111 +val_237 2 1 111 +val_238 2 1 111 +val_239 2 1 111 +val_24 2 1 111 +val_241 1 1 111 +val_242 2 1 111 +val_244 1 1 111 +val_247 1 1 111 +val_248 1 1 111 +val_249 1 1 111 +val_252 1 1 111 +val_255 2 1 111 +val_256 2 1 111 +val_257 1 1 111 +val_258 1 1 111 +val_26 2 1 111 +val_260 1 1 111 +val_262 1 1 111 +val_263 1 1 111 +val_265 2 1 111 +val_266 1 1 111 +val_27 1 1 111 +val_272 2 1 111 +val_273 3 1 111 +val_274 1 1 111 +val_275 1 1 111 +val_277 4 1 111 +val_278 2 1 111 +val_28 1 1 111 +val_280 2 1 111 +val_281 2 1 111 +val_282 2 1 111 +val_283 1 1 111 +val_284 1 1 111 +val_285 1 1 111 +val_286 1 1 111 +val_287 1 1 111 +val_288 2 1 111 +val_289 1 1 111 +val_291 1 1 111 +val_292 1 1 111 +val_296 1 1 111 +val_298 3 1 111 +val_30 1 1 111 +val_302 1 1 111 +val_305 1 1 111 +val_306 1 1 111 +val_307 2 1 111 +val_308 1 1 111 +val_309 2 1 111 +val_310 1 1 111 +val_311 3 1 111 +val_315 1 1 111 +val_316 3 1 111 +val_317 2 1 111 +val_318 3 1 111 +val_321 2 1 111 +val_322 2 1 111 +val_323 1 1 111 +val_325 2 1 111 +val_327 3 1 111 +val_33 1 1 111 +val_331 2 1 111 +val_332 1 1 111 +val_333 2 1 111 +val_335 1 1 111 +val_336 1 1 111 +val_338 1 1 111 +val_339 1 1 111 +val_34 1 1 111 +val_341 1 1 111 +val_342 2 1 111 +val_344 2 1 111 +val_345 1 1 111 +val_348 5 1 111 +val_35 3 1 111 +val_351 1 1 111 +val_353 2 1 111 +val_356 1 1 111 +val_360 1 1 111 +val_362 1 1 111 +val_364 1 1 111 +val_365 1 1 111 +val_366 1 1 111 +val_367 2 1 111 +val_368 1 1 111 +val_369 3 1 111 +val_37 2 1 111 +val_373 1 1 111 +val_374 1 1 111 +val_375 1 1 111 +val_377 1 1 111 +val_378 1 1 111 +val_379 1 1 111 +val_382 2 1 111 +val_384 3 1 111 +val_386 1 1 111 +val_389 1 1 111 +val_392 1 1 111 +val_393 1 1 111 +val_394 1 1 111 +val_395 2 1 111 +val_396 3 1 111 +val_397 2 1 111 +val_399 2 1 111 +val_4 1 1 111 +val_400 1 1 111 +val_401 5 1 111 +val_402 1 1 111 +val_403 3 1 111 +val_404 2 1 111 +val_406 4 1 111 +val_407 1 1 111 +val_409 3 1 111 +val_41 1 1 111 +val_411 1 1 111 +val_413 2 1 111 +val_414 2 1 111 +val_417 3 1 111 +val_418 1 1 111 +val_419 1 1 111 +val_42 2 1 111 +val_421 1 1 111 +val_424 2 1 111 +val_427 1 1 111 +val_429 2 1 111 +val_43 1 1 111 +val_430 3 1 111 +val_431 3 1 111 +val_432 1 1 111 +val_435 1 1 111 +val_436 1 1 111 +val_437 1 1 111 +val_438 3 1 111 +val_439 2 1 111 +val_44 1 1 111 +val_443 1 1 111 +val_444 1 1 111 +val_446 1 1 111 +val_448 1 1 111 +val_449 1 1 111 +val_452 1 1 111 +val_453 1 1 111 +val_454 3 1 111 +val_455 1 1 111 +val_457 1 1 111 +val_458 2 1 111 +val_459 2 1 111 +val_460 1 1 111 +val_462 2 1 111 +val_463 2 1 111 +val_466 3 1 111 +val_467 1 1 111 +val_468 4 1 111 +val_469 5 1 111 +val_47 1 1 111 +val_470 1 1 111 +val_472 1 1 111 +val_475 1 1 111 +val_477 1 1 111 +val_478 2 1 111 +val_479 1 1 111 +val_480 3 1 111 +val_481 1 1 111 +val_482 1 1 111 +val_483 1 1 111 +val_484 1 1 111 +val_485 1 1 111 +val_487 1 1 111 +val_489 4 1 111 +val_490 1 1 111 +val_491 1 1 111 +val_492 2 1 111 +val_493 1 1 111 +val_494 1 1 111 +val_495 1 1 111 +val_496 1 1 111 +val_497 1 1 111 +val_498 3 1 111 +val_5 3 1 111 +val_51 2 1 111 +val_53 1 1 111 +val_54 1 1 111 +val_57 1 1 111 +val_58 2 1 111 +val_64 1 1 111 +val_65 1 1 111 +val_66 1 1 111 +val_67 2 1 111 +val_69 1 1 111 +val_70 3 1 111 +val_72 2 1 111 +val_74 1 1 111 +val_76 2 1 111 +val_77 1 1 111 +val_78 1 1 111 +val_8 1 1 111 +val_80 1 1 111 +val_82 1 1 111 +val_83 2 1 111 +val_84 2 1 111 +val_85 1 1 111 +val_86 1 1 111 +val_87 1 1 111 +val_9 1 1 111 +val_90 3 1 111 +val_92 1 1 111 +val_95 2 1 111 +val_96 1 1 111 +val_97 2 1 111 +val_98 2 1 111 +PREHOOK: query: SELECT * from dest2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +PREHOOK: Input: default@dest2@ds=111 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * from dest2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +POSTHOOK: Input: default@dest2@ds=111 +#### A masked pattern was here #### +0 3 1 111 +10 1 1 111 +100 2 1 111 +103 2 1 111 +104 2 1 111 +105 1 1 111 +11 1 1 111 +111 1 1 111 +113 2 1 111 +114 1 1 111 +116 1 1 111 +118 2 1 111 +119 3 1 111 +12 2 1 111 +120 2 1 111 +125 2 1 111 +126 1 1 111 +128 3 1 111 +129 2 1 111 +131 1 1 111 +133 1 1 111 +134 2 1 111 +136 1 1 111 +137 2 1 111 +138 4 1 111 +143 1 1 111 +145 1 1 111 +146 2 1 111 +149 2 1 111 +15 2 1 111 +150 1 1 111 +152 2 1 111 +153 1 1 111 +155 1 1 111 +156 1 1 111 +157 1 1 111 +158 1 1 111 +160 1 1 111 +162 1 1 111 +163 1 1 111 +164 2 1 111 +165 2 1 111 +166 1 1 111 +167 3 1 111 +168 1 1 111 +169 4 1 111 +17 1 1 111 +170 1 1 111 +172 2 1 111 +174 2 1 111 +175 2 1 111 +176 2 1 111 +177 1 1 111 +178 1 1 111 +179 2 1 111 +18 2 1 111 +180 1 1 111 +181 1 1 111 +183 1 1 111 +186 1 1 111 +187 3 1 111 +189 1 1 111 +19 1 1 111 +190 1 1 111 +191 2 1 111 +192 1 1 111 +193 3 1 111 +194 1 1 111 +195 2 1 111 +196 1 1 111 +197 2 1 111 +199 3 1 111 +2 1 1 111 +20 1 1 111 +200 2 1 111 +201 1 1 111 +202 1 1 111 +203 2 1 111 +205 2 1 111 +207 2 1 111 +208 3 1 111 +209 2 1 111 +213 2 1 111 +214 1 1 111 +216 2 1 111 +217 2 1 111 +218 1 1 111 +219 2 1 111 +221 2 1 111 +222 1 1 111 +223 2 1 111 +224 2 1 111 +226 1 1 111 +228 1 1 111 +229 2 1 111 +230 5 1 111 +233 2 1 111 +235 1 1 111 +237 2 1 111 +238 2 1 111 +239 2 1 111 +24 2 1 111 +241 1 1 111 +242 2 1 111 +244 1 1 111 +247 1 1 111 +248 1 1 111 +249 1 1 111 +252 1 1 111 +255 2 1 111 +256 2 1 111 +257 1 1 111 +258 1 1 111 +26 2 1 111 +260 1 1 111 +262 1 1 111 +263 1 1 111 +265 2 1 111 +266 1 1 111 +27 1 1 111 +272 2 1 111 +273 3 1 111 +274 1 1 111 +275 1 1 111 +277 4 1 111 +278 2 1 111 +28 1 1 111 +280 2 1 111 +281 2 1 111 +282 2 1 111 +283 1 1 111 +284 1 1 111 +285 1 1 111 +286 1 1 111 +287 1 1 111 +288 2 1 111 +289 1 1 111 +291 1 1 111 +292 1 1 111 +296 1 1 111 +298 3 1 111 +30 1 1 111 +302 1 1 111 +305 1 1 111 +306 1 1 111 +307 2 1 111 +308 1 1 111 +309 2 1 111 +310 1 1 111 +311 3 1 111 +315 1 1 111 +316 3 1 111 +317 2 1 111 +318 3 1 111 +321 2 1 111 +322 2 1 111 +323 1 1 111 +325 2 1 111 +327 3 1 111 +33 1 1 111 +331 2 1 111 +332 1 1 111 +333 2 1 111 +335 1 1 111 +336 1 1 111 +338 1 1 111 +339 1 1 111 +34 1 1 111 +341 1 1 111 +342 2 1 111 +344 2 1 111 +345 1 1 111 +348 5 1 111 +35 3 1 111 +351 1 1 111 +353 2 1 111 +356 1 1 111 +360 1 1 111 +362 1 1 111 +364 1 1 111 +365 1 1 111 +366 1 1 111 +367 2 1 111 +368 1 1 111 +369 3 1 111 +37 2 1 111 +373 1 1 111 +374 1 1 111 +375 1 1 111 +377 1 1 111 +378 1 1 111 +379 1 1 111 +382 2 1 111 +384 3 1 111 +386 1 1 111 +389 1 1 111 +392 1 1 111 +393 1 1 111 +394 1 1 111 +395 2 1 111 +396 3 1 111 +397 2 1 111 +399 2 1 111 +4 1 1 111 +400 1 1 111 +401 5 1 111 +402 1 1 111 +403 3 1 111 +404 2 1 111 +406 4 1 111 +407 1 1 111 +409 3 1 111 +41 1 1 111 +411 1 1 111 +413 2 1 111 +414 2 1 111 +417 3 1 111 +418 1 1 111 +419 1 1 111 +42 2 1 111 +421 1 1 111 +424 2 1 111 +427 1 1 111 +429 2 1 111 +43 1 1 111 +430 3 1 111 +431 3 1 111 +432 1 1 111 +435 1 1 111 +436 1 1 111 +437 1 1 111 +438 3 1 111 +439 2 1 111 +44 1 1 111 +443 1 1 111 +444 1 1 111 +446 1 1 111 +448 1 1 111 +449 1 1 111 +452 1 1 111 +453 1 1 111 +454 3 1 111 +455 1 1 111 +457 1 1 111 +458 2 1 111 +459 2 1 111 +460 1 1 111 +462 2 1 111 +463 2 1 111 +466 3 1 111 +467 1 1 111 +468 4 1 111 +469 5 1 111 +47 1 1 111 +470 1 1 111 +472 1 1 111 +475 1 1 111 +477 1 1 111 +478 2 1 111 +479 1 1 111 +480 3 1 111 +481 1 1 111 +482 1 1 111 +483 1 1 111 +484 1 1 111 +485 1 1 111 +487 1 1 111 +489 4 1 111 +490 1 1 111 +491 1 1 111 +492 2 1 111 +493 1 1 111 +494 1 1 111 +495 1 1 111 +496 1 1 111 +497 1 1 111 +498 3 1 111 +5 3 1 111 +51 2 1 111 +53 1 1 111 +54 1 1 111 +57 1 1 111 +58 2 1 111 +64 1 1 111 +65 1 1 111 +66 1 1 111 +67 2 1 111 +69 1 1 111 +70 3 1 111 +72 2 1 111 +74 1 1 111 +76 2 1 111 +77 1 1 111 +78 1 1 111 +8 1 1 111 +80 1 1 111 +82 1 1 111 +83 2 1 111 +84 2 1 111 +85 1 1 111 +86 1 1 111 +87 1 1 111 +9 1 1 111 +90 3 1 111 +92 1 1 111 +95 2 1 111 +96 1 1 111 +97 2 1 111 +98 2 1 111 diff --git ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out new file mode 100644 index 0000000..ec6439a --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby3_map_skew.q.out @@ -0,0 +1,163 @@ +PREHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(substr(value, 5)), avg(substr(value, 5)), avg(DISTINCT substr(value, 5)), max(substr(value, 5)), min(substr(value, 5)), std(substr(value, 5)), stddev_samp(substr(value, 5)), variance(substr(value, 5)), var_samp(substr(value, 5)) + keys: substr(value, 5) (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(DISTINCT KEY._col0:0._col0), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) + mode: partials + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: string), _col4 (type: string), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), avg(VALUE._col1), avg(VALUE._col2), max(VALUE._col3), min(VALUE._col4), std(VALUE._col5), stddev_samp(VALUE._col6), variance(VALUE._col7), var_samp(VALUE._col8) + mode: final + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), UDFToDouble(_col3) (type: double), UDFToDouble(_col4) (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-3 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT + sum(substr(src.value,5)), + avg(substr(src.value,5)), + avg(DISTINCT substr(src.value,5)), + max(substr(src.value,5)), + min(substr(src.value,5)), + std(substr(src.value,5)), + stddev_samp(substr(src.value,5)), + variance(substr(src.value,5)), + var_samp(substr(src.value,5)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c5 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c6 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c7 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c8 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c9 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT c1, c2, c3, c4, c5, c6, c7, ROUND(c8, 5), ROUND(c9, 5) FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT c1, c2, c3, c4, c5, c6, c7, ROUND(c8, 5), ROUND(c9, 5) FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +130091.0 260.182 256.10355987055016 98.0 0.0 142.92680950752379 143.06995106518903 20428.07288 20469.0109 diff --git ql/src/test/results/clientpositive/spark/groupby7.q.out ql/src/test/results/clientpositive/spark/groupby7.q.out new file mode 100644 index 0000000..01af804 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby7.q.out @@ -0,0 +1,674 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 diff --git ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out new file mode 100644 index 0000000..36640ef --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby7_noskew_multi_single_reducer.q.out @@ -0,0 +1,228 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: string) + sort order: + + Map-reduce partition columns: key (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: substr(value, 5) (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: true + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, sum(SUBSTR(SRC.value,5)) GROUP BY SRC.key ORDER BY SRC.key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 diff --git ql/src/test/results/clientpositive/spark/groupby8.q.out ql/src/test/results/clientpositive/spark/groupby8.q.out new file mode 100644 index 0000000..d8295ce --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby8.q.out @@ -0,0 +1,1600 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 diff --git ql/src/test/results/clientpositive/spark/groupby8_map.q.out ql/src/test/results/clientpositive/spark/groupby8_map.q.out new file mode 100644 index 0000000..b9aa597 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby8_map.q.out @@ -0,0 +1,810 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 2 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 diff --git ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out new file mode 100644 index 0000000..b9aa597 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby8_map_skew.q.out @@ -0,0 +1,810 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 2 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 diff --git ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out new file mode 100644 index 0000000..b9aa597 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby8_noskew.q.out @@ -0,0 +1,810 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) + Reducer 4 <- Reducer 2 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 diff --git ql/src/test/results/clientpositive/spark/groupby9.q.out ql/src/test/results/clientpositive/spark/groupby9.q.out new file mode 100644 index 0000000..bec2346 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby9.q.out @@ -0,0 +1,3972 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key INT, val1 STRING, val2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string), VALUE._col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 val_0 1 +10 val_10 1 +100 val_100 1 +103 val_103 1 +104 val_104 1 +105 val_105 1 +11 val_11 1 +111 val_111 1 +113 val_113 1 +114 val_114 1 +116 val_116 1 +118 val_118 1 +119 val_119 1 +12 val_12 1 +120 val_120 1 +125 val_125 1 +126 val_126 1 +128 val_128 1 +129 val_129 1 +131 val_131 1 +133 val_133 1 +134 val_134 1 +136 val_136 1 +137 val_137 1 +138 val_138 1 +143 val_143 1 +145 val_145 1 +146 val_146 1 +149 val_149 1 +15 val_15 1 +150 val_150 1 +152 val_152 1 +153 val_153 1 +155 val_155 1 +156 val_156 1 +157 val_157 1 +158 val_158 1 +160 val_160 1 +162 val_162 1 +163 val_163 1 +164 val_164 1 +165 val_165 1 +166 val_166 1 +167 val_167 1 +168 val_168 1 +169 val_169 1 +17 val_17 1 +170 val_170 1 +172 val_172 1 +174 val_174 1 +175 val_175 1 +176 val_176 1 +177 val_177 1 +178 val_178 1 +179 val_179 1 +18 val_18 1 +180 val_180 1 +181 val_181 1 +183 val_183 1 +186 val_186 1 +187 val_187 1 +189 val_189 1 +19 val_19 1 +190 val_190 1 +191 val_191 1 +192 val_192 1 +193 val_193 1 +194 val_194 1 +195 val_195 1 +196 val_196 1 +197 val_197 1 +199 val_199 1 +2 val_2 1 +20 val_20 1 +200 val_200 1 +201 val_201 1 +202 val_202 1 +203 val_203 1 +205 val_205 1 +207 val_207 1 +208 val_208 1 +209 val_209 1 +213 val_213 1 +214 val_214 1 +216 val_216 1 +217 val_217 1 +218 val_218 1 +219 val_219 1 +221 val_221 1 +222 val_222 1 +223 val_223 1 +224 val_224 1 +226 val_226 1 +228 val_228 1 +229 val_229 1 +230 val_230 1 +233 val_233 1 +235 val_235 1 +237 val_237 1 +238 val_238 1 +239 val_239 1 +24 val_24 1 +241 val_241 1 +242 val_242 1 +244 val_244 1 +247 val_247 1 +248 val_248 1 +249 val_249 1 +252 val_252 1 +255 val_255 1 +256 val_256 1 +257 val_257 1 +258 val_258 1 +26 val_26 1 +260 val_260 1 +262 val_262 1 +263 val_263 1 +265 val_265 1 +266 val_266 1 +27 val_27 1 +272 val_272 1 +273 val_273 1 +274 val_274 1 +275 val_275 1 +277 val_277 1 +278 val_278 1 +28 val_28 1 +280 val_280 1 +281 val_281 1 +282 val_282 1 +283 val_283 1 +284 val_284 1 +285 val_285 1 +286 val_286 1 +287 val_287 1 +288 val_288 1 +289 val_289 1 +291 val_291 1 +292 val_292 1 +296 val_296 1 +298 val_298 1 +30 val_30 1 +302 val_302 1 +305 val_305 1 +306 val_306 1 +307 val_307 1 +308 val_308 1 +309 val_309 1 +310 val_310 1 +311 val_311 1 +315 val_315 1 +316 val_316 1 +317 val_317 1 +318 val_318 1 +321 val_321 1 +322 val_322 1 +323 val_323 1 +325 val_325 1 +327 val_327 1 +33 val_33 1 +331 val_331 1 +332 val_332 1 +333 val_333 1 +335 val_335 1 +336 val_336 1 +338 val_338 1 +339 val_339 1 +34 val_34 1 +341 val_341 1 +342 val_342 1 +344 val_344 1 +345 val_345 1 +348 val_348 1 +35 val_35 1 +351 val_351 1 +353 val_353 1 +356 val_356 1 +360 val_360 1 +362 val_362 1 +364 val_364 1 +365 val_365 1 +366 val_366 1 +367 val_367 1 +368 val_368 1 +369 val_369 1 +37 val_37 1 +373 val_373 1 +374 val_374 1 +375 val_375 1 +377 val_377 1 +378 val_378 1 +379 val_379 1 +382 val_382 1 +384 val_384 1 +386 val_386 1 +389 val_389 1 +392 val_392 1 +393 val_393 1 +394 val_394 1 +395 val_395 1 +396 val_396 1 +397 val_397 1 +399 val_399 1 +4 val_4 1 +400 val_400 1 +401 val_401 1 +402 val_402 1 +403 val_403 1 +404 val_404 1 +406 val_406 1 +407 val_407 1 +409 val_409 1 +41 val_41 1 +411 val_411 1 +413 val_413 1 +414 val_414 1 +417 val_417 1 +418 val_418 1 +419 val_419 1 +42 val_42 1 +421 val_421 1 +424 val_424 1 +427 val_427 1 +429 val_429 1 +43 val_43 1 +430 val_430 1 +431 val_431 1 +432 val_432 1 +435 val_435 1 +436 val_436 1 +437 val_437 1 +438 val_438 1 +439 val_439 1 +44 val_44 1 +443 val_443 1 +444 val_444 1 +446 val_446 1 +448 val_448 1 +449 val_449 1 +452 val_452 1 +453 val_453 1 +454 val_454 1 +455 val_455 1 +457 val_457 1 +458 val_458 1 +459 val_459 1 +460 val_460 1 +462 val_462 1 +463 val_463 1 +466 val_466 1 +467 val_467 1 +468 val_468 1 +469 val_469 1 +47 val_47 1 +470 val_470 1 +472 val_472 1 +475 val_475 1 +477 val_477 1 +478 val_478 1 +479 val_479 1 +480 val_480 1 +481 val_481 1 +482 val_482 1 +483 val_483 1 +484 val_484 1 +485 val_485 1 +487 val_487 1 +489 val_489 1 +490 val_490 1 +491 val_491 1 +492 val_492 1 +493 val_493 1 +494 val_494 1 +495 val_495 1 +496 val_496 1 +497 val_497 1 +498 val_498 1 +5 val_5 1 +51 val_51 1 +53 val_53 1 +54 val_54 1 +57 val_57 1 +58 val_58 1 +64 val_64 1 +65 val_65 1 +66 val_66 1 +67 val_67 1 +69 val_69 1 +70 val_70 1 +72 val_72 1 +74 val_74 1 +76 val_76 1 +77 val_77 1 +78 val_78 1 +8 val_8 1 +80 val_80 1 +82 val_82 1 +83 val_83 1 +84 val_84 1 +85 val_85 1 +86 val_86 1 +87 val_87 1 +9 val_9 1 +90 val_90 1 +92 val_92 1 +95 val_95 1 +96 val_96 1 +97 val_97 1 +98 val_98 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string), VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 val_0 1 +10 val_10 1 +100 val_100 1 +103 val_103 1 +104 val_104 1 +105 val_105 1 +11 val_11 1 +111 val_111 1 +113 val_113 1 +114 val_114 1 +116 val_116 1 +118 val_118 1 +119 val_119 1 +12 val_12 1 +120 val_120 1 +125 val_125 1 +126 val_126 1 +128 val_128 1 +129 val_129 1 +131 val_131 1 +133 val_133 1 +134 val_134 1 +136 val_136 1 +137 val_137 1 +138 val_138 1 +143 val_143 1 +145 val_145 1 +146 val_146 1 +149 val_149 1 +15 val_15 1 +150 val_150 1 +152 val_152 1 +153 val_153 1 +155 val_155 1 +156 val_156 1 +157 val_157 1 +158 val_158 1 +160 val_160 1 +162 val_162 1 +163 val_163 1 +164 val_164 1 +165 val_165 1 +166 val_166 1 +167 val_167 1 +168 val_168 1 +169 val_169 1 +17 val_17 1 +170 val_170 1 +172 val_172 1 +174 val_174 1 +175 val_175 1 +176 val_176 1 +177 val_177 1 +178 val_178 1 +179 val_179 1 +18 val_18 1 +180 val_180 1 +181 val_181 1 +183 val_183 1 +186 val_186 1 +187 val_187 1 +189 val_189 1 +19 val_19 1 +190 val_190 1 +191 val_191 1 +192 val_192 1 +193 val_193 1 +194 val_194 1 +195 val_195 1 +196 val_196 1 +197 val_197 1 +199 val_199 1 +2 val_2 1 +20 val_20 1 +200 val_200 1 +201 val_201 1 +202 val_202 1 +203 val_203 1 +205 val_205 1 +207 val_207 1 +208 val_208 1 +209 val_209 1 +213 val_213 1 +214 val_214 1 +216 val_216 1 +217 val_217 1 +218 val_218 1 +219 val_219 1 +221 val_221 1 +222 val_222 1 +223 val_223 1 +224 val_224 1 +226 val_226 1 +228 val_228 1 +229 val_229 1 +230 val_230 1 +233 val_233 1 +235 val_235 1 +237 val_237 1 +238 val_238 1 +239 val_239 1 +24 val_24 1 +241 val_241 1 +242 val_242 1 +244 val_244 1 +247 val_247 1 +248 val_248 1 +249 val_249 1 +252 val_252 1 +255 val_255 1 +256 val_256 1 +257 val_257 1 +258 val_258 1 +26 val_26 1 +260 val_260 1 +262 val_262 1 +263 val_263 1 +265 val_265 1 +266 val_266 1 +27 val_27 1 +272 val_272 1 +273 val_273 1 +274 val_274 1 +275 val_275 1 +277 val_277 1 +278 val_278 1 +28 val_28 1 +280 val_280 1 +281 val_281 1 +282 val_282 1 +283 val_283 1 +284 val_284 1 +285 val_285 1 +286 val_286 1 +287 val_287 1 +288 val_288 1 +289 val_289 1 +291 val_291 1 +292 val_292 1 +296 val_296 1 +298 val_298 1 +30 val_30 1 +302 val_302 1 +305 val_305 1 +306 val_306 1 +307 val_307 1 +308 val_308 1 +309 val_309 1 +310 val_310 1 +311 val_311 1 +315 val_315 1 +316 val_316 1 +317 val_317 1 +318 val_318 1 +321 val_321 1 +322 val_322 1 +323 val_323 1 +325 val_325 1 +327 val_327 1 +33 val_33 1 +331 val_331 1 +332 val_332 1 +333 val_333 1 +335 val_335 1 +336 val_336 1 +338 val_338 1 +339 val_339 1 +34 val_34 1 +341 val_341 1 +342 val_342 1 +344 val_344 1 +345 val_345 1 +348 val_348 1 +35 val_35 1 +351 val_351 1 +353 val_353 1 +356 val_356 1 +360 val_360 1 +362 val_362 1 +364 val_364 1 +365 val_365 1 +366 val_366 1 +367 val_367 1 +368 val_368 1 +369 val_369 1 +37 val_37 1 +373 val_373 1 +374 val_374 1 +375 val_375 1 +377 val_377 1 +378 val_378 1 +379 val_379 1 +382 val_382 1 +384 val_384 1 +386 val_386 1 +389 val_389 1 +392 val_392 1 +393 val_393 1 +394 val_394 1 +395 val_395 1 +396 val_396 1 +397 val_397 1 +399 val_399 1 +4 val_4 1 +400 val_400 1 +401 val_401 1 +402 val_402 1 +403 val_403 1 +404 val_404 1 +406 val_406 1 +407 val_407 1 +409 val_409 1 +41 val_41 1 +411 val_411 1 +413 val_413 1 +414 val_414 1 +417 val_417 1 +418 val_418 1 +419 val_419 1 +42 val_42 1 +421 val_421 1 +424 val_424 1 +427 val_427 1 +429 val_429 1 +43 val_43 1 +430 val_430 1 +431 val_431 1 +432 val_432 1 +435 val_435 1 +436 val_436 1 +437 val_437 1 +438 val_438 1 +439 val_439 1 +44 val_44 1 +443 val_443 1 +444 val_444 1 +446 val_446 1 +448 val_448 1 +449 val_449 1 +452 val_452 1 +453 val_453 1 +454 val_454 1 +455 val_455 1 +457 val_457 1 +458 val_458 1 +459 val_459 1 +460 val_460 1 +462 val_462 1 +463 val_463 1 +466 val_466 1 +467 val_467 1 +468 val_468 1 +469 val_469 1 +47 val_47 1 +470 val_470 1 +472 val_472 1 +475 val_475 1 +477 val_477 1 +478 val_478 1 +479 val_479 1 +480 val_480 1 +481 val_481 1 +482 val_482 1 +483 val_483 1 +484 val_484 1 +485 val_485 1 +487 val_487 1 +489 val_489 1 +490 val_490 1 +491 val_491 1 +492 val_492 1 +493 val_493 1 +494 val_494 1 +495 val_495 1 +496 val_496 1 +497 val_497 1 +498 val_498 1 +5 val_5 1 +51 val_51 1 +53 val_53 1 +54 val_54 1 +57 val_57 1 +58 val_58 1 +64 val_64 1 +65 val_65 1 +66 val_66 1 +67 val_67 1 +69 val_69 1 +70 val_70 1 +72 val_72 1 +74 val_74 1 +76 val_76 1 +77 val_77 1 +78 val_78 1 +8 val_8 1 +80 val_80 1 +82 val_82 1 +83 val_83 1 +84 val_84 1 +85 val_85 1 +86 val_86 1 +87 val_87 1 +9 val_9 1 +90 val_90 1 +92 val_92 1 +95 val_95 1 +96 val_96 1 +97 val_97 1 +98 val_98 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string), VALUE._col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 val_0 1 +10 val_10 1 +100 val_100 1 +103 val_103 1 +104 val_104 1 +105 val_105 1 +11 val_11 1 +111 val_111 1 +113 val_113 1 +114 val_114 1 +116 val_116 1 +118 val_118 1 +119 val_119 1 +12 val_12 1 +120 val_120 1 +125 val_125 1 +126 val_126 1 +128 val_128 1 +129 val_129 1 +131 val_131 1 +133 val_133 1 +134 val_134 1 +136 val_136 1 +137 val_137 1 +138 val_138 1 +143 val_143 1 +145 val_145 1 +146 val_146 1 +149 val_149 1 +15 val_15 1 +150 val_150 1 +152 val_152 1 +153 val_153 1 +155 val_155 1 +156 val_156 1 +157 val_157 1 +158 val_158 1 +160 val_160 1 +162 val_162 1 +163 val_163 1 +164 val_164 1 +165 val_165 1 +166 val_166 1 +167 val_167 1 +168 val_168 1 +169 val_169 1 +17 val_17 1 +170 val_170 1 +172 val_172 1 +174 val_174 1 +175 val_175 1 +176 val_176 1 +177 val_177 1 +178 val_178 1 +179 val_179 1 +18 val_18 1 +180 val_180 1 +181 val_181 1 +183 val_183 1 +186 val_186 1 +187 val_187 1 +189 val_189 1 +19 val_19 1 +190 val_190 1 +191 val_191 1 +192 val_192 1 +193 val_193 1 +194 val_194 1 +195 val_195 1 +196 val_196 1 +197 val_197 1 +199 val_199 1 +2 val_2 1 +20 val_20 1 +200 val_200 1 +201 val_201 1 +202 val_202 1 +203 val_203 1 +205 val_205 1 +207 val_207 1 +208 val_208 1 +209 val_209 1 +213 val_213 1 +214 val_214 1 +216 val_216 1 +217 val_217 1 +218 val_218 1 +219 val_219 1 +221 val_221 1 +222 val_222 1 +223 val_223 1 +224 val_224 1 +226 val_226 1 +228 val_228 1 +229 val_229 1 +230 val_230 1 +233 val_233 1 +235 val_235 1 +237 val_237 1 +238 val_238 1 +239 val_239 1 +24 val_24 1 +241 val_241 1 +242 val_242 1 +244 val_244 1 +247 val_247 1 +248 val_248 1 +249 val_249 1 +252 val_252 1 +255 val_255 1 +256 val_256 1 +257 val_257 1 +258 val_258 1 +26 val_26 1 +260 val_260 1 +262 val_262 1 +263 val_263 1 +265 val_265 1 +266 val_266 1 +27 val_27 1 +272 val_272 1 +273 val_273 1 +274 val_274 1 +275 val_275 1 +277 val_277 1 +278 val_278 1 +28 val_28 1 +280 val_280 1 +281 val_281 1 +282 val_282 1 +283 val_283 1 +284 val_284 1 +285 val_285 1 +286 val_286 1 +287 val_287 1 +288 val_288 1 +289 val_289 1 +291 val_291 1 +292 val_292 1 +296 val_296 1 +298 val_298 1 +30 val_30 1 +302 val_302 1 +305 val_305 1 +306 val_306 1 +307 val_307 1 +308 val_308 1 +309 val_309 1 +310 val_310 1 +311 val_311 1 +315 val_315 1 +316 val_316 1 +317 val_317 1 +318 val_318 1 +321 val_321 1 +322 val_322 1 +323 val_323 1 +325 val_325 1 +327 val_327 1 +33 val_33 1 +331 val_331 1 +332 val_332 1 +333 val_333 1 +335 val_335 1 +336 val_336 1 +338 val_338 1 +339 val_339 1 +34 val_34 1 +341 val_341 1 +342 val_342 1 +344 val_344 1 +345 val_345 1 +348 val_348 1 +35 val_35 1 +351 val_351 1 +353 val_353 1 +356 val_356 1 +360 val_360 1 +362 val_362 1 +364 val_364 1 +365 val_365 1 +366 val_366 1 +367 val_367 1 +368 val_368 1 +369 val_369 1 +37 val_37 1 +373 val_373 1 +374 val_374 1 +375 val_375 1 +377 val_377 1 +378 val_378 1 +379 val_379 1 +382 val_382 1 +384 val_384 1 +386 val_386 1 +389 val_389 1 +392 val_392 1 +393 val_393 1 +394 val_394 1 +395 val_395 1 +396 val_396 1 +397 val_397 1 +399 val_399 1 +4 val_4 1 +400 val_400 1 +401 val_401 1 +402 val_402 1 +403 val_403 1 +404 val_404 1 +406 val_406 1 +407 val_407 1 +409 val_409 1 +41 val_41 1 +411 val_411 1 +413 val_413 1 +414 val_414 1 +417 val_417 1 +418 val_418 1 +419 val_419 1 +42 val_42 1 +421 val_421 1 +424 val_424 1 +427 val_427 1 +429 val_429 1 +43 val_43 1 +430 val_430 1 +431 val_431 1 +432 val_432 1 +435 val_435 1 +436 val_436 1 +437 val_437 1 +438 val_438 1 +439 val_439 1 +44 val_44 1 +443 val_443 1 +444 val_444 1 +446 val_446 1 +448 val_448 1 +449 val_449 1 +452 val_452 1 +453 val_453 1 +454 val_454 1 +455 val_455 1 +457 val_457 1 +458 val_458 1 +459 val_459 1 +460 val_460 1 +462 val_462 1 +463 val_463 1 +466 val_466 1 +467 val_467 1 +468 val_468 1 +469 val_469 1 +47 val_47 1 +470 val_470 1 +472 val_472 1 +475 val_475 1 +477 val_477 1 +478 val_478 1 +479 val_479 1 +480 val_480 1 +481 val_481 1 +482 val_482 1 +483 val_483 1 +484 val_484 1 +485 val_485 1 +487 val_487 1 +489 val_489 1 +490 val_490 1 +491 val_491 1 +492 val_492 1 +493 val_493 1 +494 val_494 1 +495 val_495 1 +496 val_496 1 +497 val_497 1 +498 val_498 1 +5 val_5 1 +51 val_51 1 +53 val_53 1 +54 val_54 1 +57 val_57 1 +58 val_58 1 +64 val_64 1 +65 val_65 1 +66 val_66 1 +67 val_67 1 +69 val_69 1 +70 val_70 1 +72 val_72 1 +74 val_74 1 +76 val_76 1 +77 val_77 1 +78 val_78 1 +8 val_8 1 +80 val_80 1 +82 val_82 1 +83 val_83 1 +84 val_84 1 +85 val_85 1 +86 val_86 1 +87 val_87 1 +9 val_9 1 +90 val_90 1 +92 val_92 1 +95 val_95 1 +96 val_96 1 +97 val_97 1 +98 val_98 1 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(substr(value, 5)) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(substr(value, 5)) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(SUBSTR(SRC.value,5)) GROUP BY SRC.key, SRC.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 3 +10 1 +100 2 +103 2 +104 2 +105 1 +11 1 +111 1 +113 2 +114 1 +116 1 +118 2 +119 3 +12 2 +120 2 +125 2 +126 1 +128 3 +129 2 +131 1 +133 1 +134 2 +136 1 +137 2 +138 4 +143 1 +145 1 +146 2 +149 2 +15 2 +150 1 +152 2 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 2 +165 2 +166 1 +167 3 +168 1 +169 4 +17 1 +170 1 +172 2 +174 2 +175 2 +176 2 +177 1 +178 1 +179 2 +18 2 +180 1 +181 1 +183 1 +186 1 +187 3 +189 1 +19 1 +190 1 +191 2 +192 1 +193 3 +194 1 +195 2 +196 1 +197 2 +199 3 +2 1 +20 1 +200 2 +201 1 +202 1 +203 2 +205 2 +207 2 +208 3 +209 2 +213 2 +214 1 +216 2 +217 2 +218 1 +219 2 +221 2 +222 1 +223 2 +224 2 +226 1 +228 1 +229 2 +230 5 +233 2 +235 1 +237 2 +238 2 +239 2 +24 2 +241 1 +242 2 +244 1 +247 1 +248 1 +249 1 +252 1 +255 2 +256 2 +257 1 +258 1 +26 2 +260 1 +262 1 +263 1 +265 2 +266 1 +27 1 +272 2 +273 3 +274 1 +275 1 +277 4 +278 2 +28 1 +280 2 +281 2 +282 2 +283 1 +284 1 +285 1 +286 1 +287 1 +288 2 +289 1 +291 1 +292 1 +296 1 +298 3 +30 1 +302 1 +305 1 +306 1 +307 2 +308 1 +309 2 +310 1 +311 3 +315 1 +316 3 +317 2 +318 3 +321 2 +322 2 +323 1 +325 2 +327 3 +33 1 +331 2 +332 1 +333 2 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 2 +344 2 +345 1 +348 5 +35 3 +351 1 +353 2 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 2 +368 1 +369 3 +37 2 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 2 +384 3 +386 1 +389 1 +392 1 +393 1 +394 1 +395 2 +396 3 +397 2 +399 2 +4 1 +400 1 +401 5 +402 1 +403 3 +404 2 +406 4 +407 1 +409 3 +41 1 +411 1 +413 2 +414 2 +417 3 +418 1 +419 1 +42 2 +421 1 +424 2 +427 1 +429 2 +43 1 +430 3 +431 3 +432 1 +435 1 +436 1 +437 1 +438 3 +439 2 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 3 +455 1 +457 1 +458 2 +459 2 +460 1 +462 2 +463 2 +466 3 +467 1 +468 4 +469 5 +47 1 +470 1 +472 1 +475 1 +477 1 +478 2 +479 1 +480 3 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 4 +490 1 +491 1 +492 2 +493 1 +494 1 +495 1 +496 1 +497 1 +498 3 +5 3 +51 2 +53 1 +54 1 +57 1 +58 2 +64 1 +65 1 +66 1 +67 2 +69 1 +70 3 +72 2 +74 1 +76 2 +77 1 +78 1 +8 1 +80 1 +82 1 +83 2 +84 2 +85 1 +86 1 +87 1 +9 1 +90 3 +92 1 +95 2 +96 1 +97 2 +98 2 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 val_0 3 +10 val_10 1 +100 val_100 2 +103 val_103 2 +104 val_104 2 +105 val_105 1 +11 val_11 1 +111 val_111 1 +113 val_113 2 +114 val_114 1 +116 val_116 1 +118 val_118 2 +119 val_119 3 +12 val_12 2 +120 val_120 2 +125 val_125 2 +126 val_126 1 +128 val_128 3 +129 val_129 2 +131 val_131 1 +133 val_133 1 +134 val_134 2 +136 val_136 1 +137 val_137 2 +138 val_138 4 +143 val_143 1 +145 val_145 1 +146 val_146 2 +149 val_149 2 +15 val_15 2 +150 val_150 1 +152 val_152 2 +153 val_153 1 +155 val_155 1 +156 val_156 1 +157 val_157 1 +158 val_158 1 +160 val_160 1 +162 val_162 1 +163 val_163 1 +164 val_164 2 +165 val_165 2 +166 val_166 1 +167 val_167 3 +168 val_168 1 +169 val_169 4 +17 val_17 1 +170 val_170 1 +172 val_172 2 +174 val_174 2 +175 val_175 2 +176 val_176 2 +177 val_177 1 +178 val_178 1 +179 val_179 2 +18 val_18 2 +180 val_180 1 +181 val_181 1 +183 val_183 1 +186 val_186 1 +187 val_187 3 +189 val_189 1 +19 val_19 1 +190 val_190 1 +191 val_191 2 +192 val_192 1 +193 val_193 3 +194 val_194 1 +195 val_195 2 +196 val_196 1 +197 val_197 2 +199 val_199 3 +2 val_2 1 +20 val_20 1 +200 val_200 2 +201 val_201 1 +202 val_202 1 +203 val_203 2 +205 val_205 2 +207 val_207 2 +208 val_208 3 +209 val_209 2 +213 val_213 2 +214 val_214 1 +216 val_216 2 +217 val_217 2 +218 val_218 1 +219 val_219 2 +221 val_221 2 +222 val_222 1 +223 val_223 2 +224 val_224 2 +226 val_226 1 +228 val_228 1 +229 val_229 2 +230 val_230 5 +233 val_233 2 +235 val_235 1 +237 val_237 2 +238 val_238 2 +239 val_239 2 +24 val_24 2 +241 val_241 1 +242 val_242 2 +244 val_244 1 +247 val_247 1 +248 val_248 1 +249 val_249 1 +252 val_252 1 +255 val_255 2 +256 val_256 2 +257 val_257 1 +258 val_258 1 +26 val_26 2 +260 val_260 1 +262 val_262 1 +263 val_263 1 +265 val_265 2 +266 val_266 1 +27 val_27 1 +272 val_272 2 +273 val_273 3 +274 val_274 1 +275 val_275 1 +277 val_277 4 +278 val_278 2 +28 val_28 1 +280 val_280 2 +281 val_281 2 +282 val_282 2 +283 val_283 1 +284 val_284 1 +285 val_285 1 +286 val_286 1 +287 val_287 1 +288 val_288 2 +289 val_289 1 +291 val_291 1 +292 val_292 1 +296 val_296 1 +298 val_298 3 +30 val_30 1 +302 val_302 1 +305 val_305 1 +306 val_306 1 +307 val_307 2 +308 val_308 1 +309 val_309 2 +310 val_310 1 +311 val_311 3 +315 val_315 1 +316 val_316 3 +317 val_317 2 +318 val_318 3 +321 val_321 2 +322 val_322 2 +323 val_323 1 +325 val_325 2 +327 val_327 3 +33 val_33 1 +331 val_331 2 +332 val_332 1 +333 val_333 2 +335 val_335 1 +336 val_336 1 +338 val_338 1 +339 val_339 1 +34 val_34 1 +341 val_341 1 +342 val_342 2 +344 val_344 2 +345 val_345 1 +348 val_348 5 +35 val_35 3 +351 val_351 1 +353 val_353 2 +356 val_356 1 +360 val_360 1 +362 val_362 1 +364 val_364 1 +365 val_365 1 +366 val_366 1 +367 val_367 2 +368 val_368 1 +369 val_369 3 +37 val_37 2 +373 val_373 1 +374 val_374 1 +375 val_375 1 +377 val_377 1 +378 val_378 1 +379 val_379 1 +382 val_382 2 +384 val_384 3 +386 val_386 1 +389 val_389 1 +392 val_392 1 +393 val_393 1 +394 val_394 1 +395 val_395 2 +396 val_396 3 +397 val_397 2 +399 val_399 2 +4 val_4 1 +400 val_400 1 +401 val_401 5 +402 val_402 1 +403 val_403 3 +404 val_404 2 +406 val_406 4 +407 val_407 1 +409 val_409 3 +41 val_41 1 +411 val_411 1 +413 val_413 2 +414 val_414 2 +417 val_417 3 +418 val_418 1 +419 val_419 1 +42 val_42 2 +421 val_421 1 +424 val_424 2 +427 val_427 1 +429 val_429 2 +43 val_43 1 +430 val_430 3 +431 val_431 3 +432 val_432 1 +435 val_435 1 +436 val_436 1 +437 val_437 1 +438 val_438 3 +439 val_439 2 +44 val_44 1 +443 val_443 1 +444 val_444 1 +446 val_446 1 +448 val_448 1 +449 val_449 1 +452 val_452 1 +453 val_453 1 +454 val_454 3 +455 val_455 1 +457 val_457 1 +458 val_458 2 +459 val_459 2 +460 val_460 1 +462 val_462 2 +463 val_463 2 +466 val_466 3 +467 val_467 1 +468 val_468 4 +469 val_469 5 +47 val_47 1 +470 val_470 1 +472 val_472 1 +475 val_475 1 +477 val_477 1 +478 val_478 2 +479 val_479 1 +480 val_480 3 +481 val_481 1 +482 val_482 1 +483 val_483 1 +484 val_484 1 +485 val_485 1 +487 val_487 1 +489 val_489 4 +490 val_490 1 +491 val_491 1 +492 val_492 2 +493 val_493 1 +494 val_494 1 +495 val_495 1 +496 val_496 1 +497 val_497 1 +498 val_498 3 +5 val_5 3 +51 val_51 2 +53 val_53 1 +54 val_54 1 +57 val_57 1 +58 val_58 2 +64 val_64 1 +65 val_65 1 +66 val_66 1 +67 val_67 2 +69 val_69 1 +70 val_70 3 +72 val_72 2 +74 val_74 1 +76 val_76 2 +77 val_77 1 +78 val_78 1 +8 val_8 1 +80 val_80 1 +82 val_82 1 +83 val_83 2 +84 val_84 2 +85 val_85 1 +86 val_86 1 +87 val_87 1 +9 val_9 1 +90 val_90 3 +92 val_92 1 +95 val_95 2 +96 val_96 1 +97 val_97 2 +98 val_98 2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: substr(value, 5) (type: string) + sort order: + + Map-reduce partition columns: substr(value, 5) (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), value (type: string) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: string), VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT SRC.key, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.key +INSERT OVERWRITE TABLE DEST2 SELECT SRC.key, SRC.value, COUNT(DISTINCT SUBSTR(SRC.value,5)) GROUP BY SRC.value, SRC.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val1 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.val2 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +10 1 +100 1 +103 1 +104 1 +105 1 +11 1 +111 1 +113 1 +114 1 +116 1 +118 1 +119 1 +12 1 +120 1 +125 1 +126 1 +128 1 +129 1 +131 1 +133 1 +134 1 +136 1 +137 1 +138 1 +143 1 +145 1 +146 1 +149 1 +15 1 +150 1 +152 1 +153 1 +155 1 +156 1 +157 1 +158 1 +160 1 +162 1 +163 1 +164 1 +165 1 +166 1 +167 1 +168 1 +169 1 +17 1 +170 1 +172 1 +174 1 +175 1 +176 1 +177 1 +178 1 +179 1 +18 1 +180 1 +181 1 +183 1 +186 1 +187 1 +189 1 +19 1 +190 1 +191 1 +192 1 +193 1 +194 1 +195 1 +196 1 +197 1 +199 1 +2 1 +20 1 +200 1 +201 1 +202 1 +203 1 +205 1 +207 1 +208 1 +209 1 +213 1 +214 1 +216 1 +217 1 +218 1 +219 1 +221 1 +222 1 +223 1 +224 1 +226 1 +228 1 +229 1 +230 1 +233 1 +235 1 +237 1 +238 1 +239 1 +24 1 +241 1 +242 1 +244 1 +247 1 +248 1 +249 1 +252 1 +255 1 +256 1 +257 1 +258 1 +26 1 +260 1 +262 1 +263 1 +265 1 +266 1 +27 1 +272 1 +273 1 +274 1 +275 1 +277 1 +278 1 +28 1 +280 1 +281 1 +282 1 +283 1 +284 1 +285 1 +286 1 +287 1 +288 1 +289 1 +291 1 +292 1 +296 1 +298 1 +30 1 +302 1 +305 1 +306 1 +307 1 +308 1 +309 1 +310 1 +311 1 +315 1 +316 1 +317 1 +318 1 +321 1 +322 1 +323 1 +325 1 +327 1 +33 1 +331 1 +332 1 +333 1 +335 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 1 +344 1 +345 1 +348 1 +35 1 +351 1 +353 1 +356 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 1 +368 1 +369 1 +37 1 +373 1 +374 1 +375 1 +377 1 +378 1 +379 1 +382 1 +384 1 +386 1 +389 1 +392 1 +393 1 +394 1 +395 1 +396 1 +397 1 +399 1 +4 1 +400 1 +401 1 +402 1 +403 1 +404 1 +406 1 +407 1 +409 1 +41 1 +411 1 +413 1 +414 1 +417 1 +418 1 +419 1 +42 1 +421 1 +424 1 +427 1 +429 1 +43 1 +430 1 +431 1 +432 1 +435 1 +436 1 +437 1 +438 1 +439 1 +44 1 +443 1 +444 1 +446 1 +448 1 +449 1 +452 1 +453 1 +454 1 +455 1 +457 1 +458 1 +459 1 +460 1 +462 1 +463 1 +466 1 +467 1 +468 1 +469 1 +47 1 +470 1 +472 1 +475 1 +477 1 +478 1 +479 1 +480 1 +481 1 +482 1 +483 1 +484 1 +485 1 +487 1 +489 1 +490 1 +491 1 +492 1 +493 1 +494 1 +495 1 +496 1 +497 1 +498 1 +5 1 +51 1 +53 1 +54 1 +57 1 +58 1 +64 1 +65 1 +66 1 +67 1 +69 1 +70 1 +72 1 +74 1 +76 1 +77 1 +78 1 +8 1 +80 1 +82 1 +83 1 +84 1 +85 1 +86 1 +87 1 +9 1 +90 1 +92 1 +95 1 +96 1 +97 1 +98 1 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 val_0 1 +10 val_10 1 +100 val_100 1 +103 val_103 1 +104 val_104 1 +105 val_105 1 +11 val_11 1 +111 val_111 1 +113 val_113 1 +114 val_114 1 +116 val_116 1 +118 val_118 1 +119 val_119 1 +12 val_12 1 +120 val_120 1 +125 val_125 1 +126 val_126 1 +128 val_128 1 +129 val_129 1 +131 val_131 1 +133 val_133 1 +134 val_134 1 +136 val_136 1 +137 val_137 1 +138 val_138 1 +143 val_143 1 +145 val_145 1 +146 val_146 1 +149 val_149 1 +15 val_15 1 +150 val_150 1 +152 val_152 1 +153 val_153 1 +155 val_155 1 +156 val_156 1 +157 val_157 1 +158 val_158 1 +160 val_160 1 +162 val_162 1 +163 val_163 1 +164 val_164 1 +165 val_165 1 +166 val_166 1 +167 val_167 1 +168 val_168 1 +169 val_169 1 +17 val_17 1 +170 val_170 1 +172 val_172 1 +174 val_174 1 +175 val_175 1 +176 val_176 1 +177 val_177 1 +178 val_178 1 +179 val_179 1 +18 val_18 1 +180 val_180 1 +181 val_181 1 +183 val_183 1 +186 val_186 1 +187 val_187 1 +189 val_189 1 +19 val_19 1 +190 val_190 1 +191 val_191 1 +192 val_192 1 +193 val_193 1 +194 val_194 1 +195 val_195 1 +196 val_196 1 +197 val_197 1 +199 val_199 1 +2 val_2 1 +20 val_20 1 +200 val_200 1 +201 val_201 1 +202 val_202 1 +203 val_203 1 +205 val_205 1 +207 val_207 1 +208 val_208 1 +209 val_209 1 +213 val_213 1 +214 val_214 1 +216 val_216 1 +217 val_217 1 +218 val_218 1 +219 val_219 1 +221 val_221 1 +222 val_222 1 +223 val_223 1 +224 val_224 1 +226 val_226 1 +228 val_228 1 +229 val_229 1 +230 val_230 1 +233 val_233 1 +235 val_235 1 +237 val_237 1 +238 val_238 1 +239 val_239 1 +24 val_24 1 +241 val_241 1 +242 val_242 1 +244 val_244 1 +247 val_247 1 +248 val_248 1 +249 val_249 1 +252 val_252 1 +255 val_255 1 +256 val_256 1 +257 val_257 1 +258 val_258 1 +26 val_26 1 +260 val_260 1 +262 val_262 1 +263 val_263 1 +265 val_265 1 +266 val_266 1 +27 val_27 1 +272 val_272 1 +273 val_273 1 +274 val_274 1 +275 val_275 1 +277 val_277 1 +278 val_278 1 +28 val_28 1 +280 val_280 1 +281 val_281 1 +282 val_282 1 +283 val_283 1 +284 val_284 1 +285 val_285 1 +286 val_286 1 +287 val_287 1 +288 val_288 1 +289 val_289 1 +291 val_291 1 +292 val_292 1 +296 val_296 1 +298 val_298 1 +30 val_30 1 +302 val_302 1 +305 val_305 1 +306 val_306 1 +307 val_307 1 +308 val_308 1 +309 val_309 1 +310 val_310 1 +311 val_311 1 +315 val_315 1 +316 val_316 1 +317 val_317 1 +318 val_318 1 +321 val_321 1 +322 val_322 1 +323 val_323 1 +325 val_325 1 +327 val_327 1 +33 val_33 1 +331 val_331 1 +332 val_332 1 +333 val_333 1 +335 val_335 1 +336 val_336 1 +338 val_338 1 +339 val_339 1 +34 val_34 1 +341 val_341 1 +342 val_342 1 +344 val_344 1 +345 val_345 1 +348 val_348 1 +35 val_35 1 +351 val_351 1 +353 val_353 1 +356 val_356 1 +360 val_360 1 +362 val_362 1 +364 val_364 1 +365 val_365 1 +366 val_366 1 +367 val_367 1 +368 val_368 1 +369 val_369 1 +37 val_37 1 +373 val_373 1 +374 val_374 1 +375 val_375 1 +377 val_377 1 +378 val_378 1 +379 val_379 1 +382 val_382 1 +384 val_384 1 +386 val_386 1 +389 val_389 1 +392 val_392 1 +393 val_393 1 +394 val_394 1 +395 val_395 1 +396 val_396 1 +397 val_397 1 +399 val_399 1 +4 val_4 1 +400 val_400 1 +401 val_401 1 +402 val_402 1 +403 val_403 1 +404 val_404 1 +406 val_406 1 +407 val_407 1 +409 val_409 1 +41 val_41 1 +411 val_411 1 +413 val_413 1 +414 val_414 1 +417 val_417 1 +418 val_418 1 +419 val_419 1 +42 val_42 1 +421 val_421 1 +424 val_424 1 +427 val_427 1 +429 val_429 1 +43 val_43 1 +430 val_430 1 +431 val_431 1 +432 val_432 1 +435 val_435 1 +436 val_436 1 +437 val_437 1 +438 val_438 1 +439 val_439 1 +44 val_44 1 +443 val_443 1 +444 val_444 1 +446 val_446 1 +448 val_448 1 +449 val_449 1 +452 val_452 1 +453 val_453 1 +454 val_454 1 +455 val_455 1 +457 val_457 1 +458 val_458 1 +459 val_459 1 +460 val_460 1 +462 val_462 1 +463 val_463 1 +466 val_466 1 +467 val_467 1 +468 val_468 1 +469 val_469 1 +47 val_47 1 +470 val_470 1 +472 val_472 1 +475 val_475 1 +477 val_477 1 +478 val_478 1 +479 val_479 1 +480 val_480 1 +481 val_481 1 +482 val_482 1 +483 val_483 1 +484 val_484 1 +485 val_485 1 +487 val_487 1 +489 val_489 1 +490 val_490 1 +491 val_491 1 +492 val_492 1 +493 val_493 1 +494 val_494 1 +495 val_495 1 +496 val_496 1 +497 val_497 1 +498 val_498 1 +5 val_5 1 +51 val_51 1 +53 val_53 1 +54 val_54 1 +57 val_57 1 +58 val_58 1 +64 val_64 1 +65 val_65 1 +66 val_66 1 +67 val_67 1 +69 val_69 1 +70 val_70 1 +72 val_72 1 +74 val_74 1 +76 val_76 1 +77 val_77 1 +78 val_78 1 +8 val_8 1 +80 val_80 1 +82 val_82 1 +83 val_83 1 +84 val_84 1 +85 val_85 1 +86 val_86 1 +87 val_87 1 +9 val_9 1 +90 val_90 1 +92 val_92 1 +95 val_95 1 +96 val_96 1 +97 val_97 1 +98 val_98 1 diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out new file mode 100644 index 0000000..16fadea --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby_complex_types.q.out @@ -0,0 +1,1193 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key ARRAY, value BIGINT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key ARRAY, value BIGINT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key MAP, value BIGINT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key MAP, value BIGINT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: CREATE TABLE DEST3(key STRUCT, value BIGINT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST3 +POSTHOOK: query: CREATE TABLE DEST3(key STRUCT, value BIGINT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST3 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) +INSERT OVERWRITE TABLE DEST3 SELECT STRUCT(SRC.key, SRC.value), COUNT(1) GROUP BY STRUCT(SRC.key, SRC.value) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) +INSERT OVERWRITE TABLE DEST3 SELECT STRUCT(SRC.key, SRC.value), COUNT(1) GROUP BY STRUCT(SRC.key, SRC.value) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-3 is a root stage + Stage-4 depends on stages: Stage-3 + Stage-0 depends on stages: Stage-4 + Stage-5 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-4 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-4 + Stage-7 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-3 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Map 1 (GROUP, 1) + Reducer 4 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: array(key) (type: array) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: array) + sort order: + + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: map(key:value) (type: map) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: map) + sort order: + + Map-reduce partition columns: _col0 (type: map) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: struct(key,value) (type: struct) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: struct) + sort order: + + Map-reduce partition columns: _col0 (type: struct) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: array) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: array), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: map) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: map), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: struct) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: struct), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + + Stage: Stage-4 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-5 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-6 + Stats-Aggr Operator + + Stage: Stage-2 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest3 + + Stage: Stage-7 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) +INSERT OVERWRITE TABLE DEST3 SELECT STRUCT(SRC.key, SRC.value), COUNT(1) GROUP BY STRUCT(SRC.key, SRC.value) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +PREHOOK: Output: default@dest3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) +INSERT OVERWRITE TABLE DEST3 SELECT STRUCT(SRC.key, SRC.value), COUNT(1) GROUP BY STRUCT(SRC.key, SRC.value) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Output: default@dest3 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest3.value EXPRESSION [(src)src.null, ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +["0"] 3 +["10"] 1 +["100"] 2 +["103"] 2 +["104"] 2 +["105"] 1 +["11"] 1 +["111"] 1 +["113"] 2 +["114"] 1 +["116"] 1 +["118"] 2 +["119"] 3 +["12"] 2 +["120"] 2 +["125"] 2 +["126"] 1 +["128"] 3 +["129"] 2 +["131"] 1 +["133"] 1 +["134"] 2 +["136"] 1 +["137"] 2 +["138"] 4 +["143"] 1 +["145"] 1 +["146"] 2 +["149"] 2 +["15"] 2 +["150"] 1 +["152"] 2 +["153"] 1 +["155"] 1 +["156"] 1 +["157"] 1 +["158"] 1 +["160"] 1 +["162"] 1 +["163"] 1 +["164"] 2 +["165"] 2 +["166"] 1 +["167"] 3 +["168"] 1 +["169"] 4 +["17"] 1 +["170"] 1 +["172"] 2 +["174"] 2 +["175"] 2 +["176"] 2 +["177"] 1 +["178"] 1 +["179"] 2 +["18"] 2 +["180"] 1 +["181"] 1 +["183"] 1 +["186"] 1 +["187"] 3 +["189"] 1 +["19"] 1 +["190"] 1 +["191"] 2 +["192"] 1 +["193"] 3 +["194"] 1 +["195"] 2 +["196"] 1 +["197"] 2 +["199"] 3 +["2"] 1 +["20"] 1 +["200"] 2 +["201"] 1 +["202"] 1 +["203"] 2 +["205"] 2 +["207"] 2 +["208"] 3 +["209"] 2 +["213"] 2 +["214"] 1 +["216"] 2 +["217"] 2 +["218"] 1 +["219"] 2 +["221"] 2 +["222"] 1 +["223"] 2 +["224"] 2 +["226"] 1 +["228"] 1 +["229"] 2 +["230"] 5 +["233"] 2 +["235"] 1 +["237"] 2 +["238"] 2 +["239"] 2 +["24"] 2 +["241"] 1 +["242"] 2 +["244"] 1 +["247"] 1 +["248"] 1 +["249"] 1 +["252"] 1 +["255"] 2 +["256"] 2 +["257"] 1 +["258"] 1 +["26"] 2 +["260"] 1 +["262"] 1 +["263"] 1 +["265"] 2 +["266"] 1 +["27"] 1 +["272"] 2 +["273"] 3 +["274"] 1 +["275"] 1 +["277"] 4 +["278"] 2 +["28"] 1 +["280"] 2 +["281"] 2 +["282"] 2 +["283"] 1 +["284"] 1 +["285"] 1 +["286"] 1 +["287"] 1 +["288"] 2 +["289"] 1 +["291"] 1 +["292"] 1 +["296"] 1 +["298"] 3 +["30"] 1 +["302"] 1 +["305"] 1 +["306"] 1 +["307"] 2 +["308"] 1 +["309"] 2 +["310"] 1 +["311"] 3 +["315"] 1 +["316"] 3 +["317"] 2 +["318"] 3 +["321"] 2 +["322"] 2 +["323"] 1 +["325"] 2 +["327"] 3 +["33"] 1 +["331"] 2 +["332"] 1 +["333"] 2 +["335"] 1 +["336"] 1 +["338"] 1 +["339"] 1 +["34"] 1 +["341"] 1 +["342"] 2 +["344"] 2 +["345"] 1 +["348"] 5 +["35"] 3 +["351"] 1 +["353"] 2 +["356"] 1 +["360"] 1 +["362"] 1 +["364"] 1 +["365"] 1 +["366"] 1 +["367"] 2 +["368"] 1 +["369"] 3 +["37"] 2 +["373"] 1 +["374"] 1 +["375"] 1 +["377"] 1 +["378"] 1 +["379"] 1 +["382"] 2 +["384"] 3 +["386"] 1 +["389"] 1 +["392"] 1 +["393"] 1 +["394"] 1 +["395"] 2 +["396"] 3 +["397"] 2 +["399"] 2 +["4"] 1 +["400"] 1 +["401"] 5 +["402"] 1 +["403"] 3 +["404"] 2 +["406"] 4 +["407"] 1 +["409"] 3 +["41"] 1 +["411"] 1 +["413"] 2 +["414"] 2 +["417"] 3 +["418"] 1 +["419"] 1 +["42"] 2 +["421"] 1 +["424"] 2 +["427"] 1 +["429"] 2 +["43"] 1 +["430"] 3 +["431"] 3 +["432"] 1 +["435"] 1 +["436"] 1 +["437"] 1 +["438"] 3 +["439"] 2 +["44"] 1 +["443"] 1 +["444"] 1 +["446"] 1 +["448"] 1 +["449"] 1 +["452"] 1 +["453"] 1 +["454"] 3 +["455"] 1 +["457"] 1 +["458"] 2 +["459"] 2 +["460"] 1 +["462"] 2 +["463"] 2 +["466"] 3 +["467"] 1 +["468"] 4 +["469"] 5 +["47"] 1 +["470"] 1 +["472"] 1 +["475"] 1 +["477"] 1 +["478"] 2 +["479"] 1 +["480"] 3 +["481"] 1 +["482"] 1 +["483"] 1 +["484"] 1 +["485"] 1 +["487"] 1 +["489"] 4 +["490"] 1 +["491"] 1 +["492"] 2 +["493"] 1 +["494"] 1 +["495"] 1 +["496"] 1 +["497"] 1 +["498"] 3 +["5"] 3 +["51"] 2 +["53"] 1 +["54"] 1 +["57"] 1 +["58"] 2 +["64"] 1 +["65"] 1 +["66"] 1 +["67"] 2 +["69"] 1 +["70"] 3 +["72"] 2 +["74"] 1 +["76"] 2 +["77"] 1 +["78"] 1 +["8"] 1 +["80"] 1 +["82"] 1 +["83"] 2 +["84"] 2 +["85"] 1 +["86"] 1 +["87"] 1 +["9"] 1 +["90"] 3 +["92"] 1 +["95"] 2 +["96"] 1 +["97"] 2 +["98"] 2 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +{"0":"val_0"} 3 +{"10":"val_10"} 1 +{"100":"val_100"} 2 +{"103":"val_103"} 2 +{"104":"val_104"} 2 +{"105":"val_105"} 1 +{"11":"val_11"} 1 +{"111":"val_111"} 1 +{"113":"val_113"} 2 +{"114":"val_114"} 1 +{"116":"val_116"} 1 +{"118":"val_118"} 2 +{"119":"val_119"} 3 +{"12":"val_12"} 2 +{"120":"val_120"} 2 +{"125":"val_125"} 2 +{"126":"val_126"} 1 +{"128":"val_128"} 3 +{"129":"val_129"} 2 +{"131":"val_131"} 1 +{"133":"val_133"} 1 +{"134":"val_134"} 2 +{"136":"val_136"} 1 +{"137":"val_137"} 2 +{"138":"val_138"} 4 +{"143":"val_143"} 1 +{"145":"val_145"} 1 +{"146":"val_146"} 2 +{"149":"val_149"} 2 +{"15":"val_15"} 2 +{"150":"val_150"} 1 +{"152":"val_152"} 2 +{"153":"val_153"} 1 +{"155":"val_155"} 1 +{"156":"val_156"} 1 +{"157":"val_157"} 1 +{"158":"val_158"} 1 +{"160":"val_160"} 1 +{"162":"val_162"} 1 +{"163":"val_163"} 1 +{"164":"val_164"} 2 +{"165":"val_165"} 2 +{"166":"val_166"} 1 +{"167":"val_167"} 3 +{"168":"val_168"} 1 +{"169":"val_169"} 4 +{"17":"val_17"} 1 +{"170":"val_170"} 1 +{"172":"val_172"} 2 +{"174":"val_174"} 2 +{"175":"val_175"} 2 +{"176":"val_176"} 2 +{"177":"val_177"} 1 +{"178":"val_178"} 1 +{"179":"val_179"} 2 +{"18":"val_18"} 2 +{"180":"val_180"} 1 +{"181":"val_181"} 1 +{"183":"val_183"} 1 +{"186":"val_186"} 1 +{"187":"val_187"} 3 +{"189":"val_189"} 1 +{"19":"val_19"} 1 +{"190":"val_190"} 1 +{"191":"val_191"} 2 +{"192":"val_192"} 1 +{"193":"val_193"} 3 +{"194":"val_194"} 1 +{"195":"val_195"} 2 +{"196":"val_196"} 1 +{"197":"val_197"} 2 +{"199":"val_199"} 3 +{"2":"val_2"} 1 +{"20":"val_20"} 1 +{"200":"val_200"} 2 +{"201":"val_201"} 1 +{"202":"val_202"} 1 +{"203":"val_203"} 2 +{"205":"val_205"} 2 +{"207":"val_207"} 2 +{"208":"val_208"} 3 +{"209":"val_209"} 2 +{"213":"val_213"} 2 +{"214":"val_214"} 1 +{"216":"val_216"} 2 +{"217":"val_217"} 2 +{"218":"val_218"} 1 +{"219":"val_219"} 2 +{"221":"val_221"} 2 +{"222":"val_222"} 1 +{"223":"val_223"} 2 +{"224":"val_224"} 2 +{"226":"val_226"} 1 +{"228":"val_228"} 1 +{"229":"val_229"} 2 +{"230":"val_230"} 5 +{"233":"val_233"} 2 +{"235":"val_235"} 1 +{"237":"val_237"} 2 +{"238":"val_238"} 2 +{"239":"val_239"} 2 +{"24":"val_24"} 2 +{"241":"val_241"} 1 +{"242":"val_242"} 2 +{"244":"val_244"} 1 +{"247":"val_247"} 1 +{"248":"val_248"} 1 +{"249":"val_249"} 1 +{"252":"val_252"} 1 +{"255":"val_255"} 2 +{"256":"val_256"} 2 +{"257":"val_257"} 1 +{"258":"val_258"} 1 +{"26":"val_26"} 2 +{"260":"val_260"} 1 +{"262":"val_262"} 1 +{"263":"val_263"} 1 +{"265":"val_265"} 2 +{"266":"val_266"} 1 +{"27":"val_27"} 1 +{"272":"val_272"} 2 +{"273":"val_273"} 3 +{"274":"val_274"} 1 +{"275":"val_275"} 1 +{"277":"val_277"} 4 +{"278":"val_278"} 2 +{"28":"val_28"} 1 +{"280":"val_280"} 2 +{"281":"val_281"} 2 +{"282":"val_282"} 2 +{"283":"val_283"} 1 +{"284":"val_284"} 1 +{"285":"val_285"} 1 +{"286":"val_286"} 1 +{"287":"val_287"} 1 +{"288":"val_288"} 2 +{"289":"val_289"} 1 +{"291":"val_291"} 1 +{"292":"val_292"} 1 +{"296":"val_296"} 1 +{"298":"val_298"} 3 +{"30":"val_30"} 1 +{"302":"val_302"} 1 +{"305":"val_305"} 1 +{"306":"val_306"} 1 +{"307":"val_307"} 2 +{"308":"val_308"} 1 +{"309":"val_309"} 2 +{"310":"val_310"} 1 +{"311":"val_311"} 3 +{"315":"val_315"} 1 +{"316":"val_316"} 3 +{"317":"val_317"} 2 +{"318":"val_318"} 3 +{"321":"val_321"} 2 +{"322":"val_322"} 2 +{"323":"val_323"} 1 +{"325":"val_325"} 2 +{"327":"val_327"} 3 +{"33":"val_33"} 1 +{"331":"val_331"} 2 +{"332":"val_332"} 1 +{"333":"val_333"} 2 +{"335":"val_335"} 1 +{"336":"val_336"} 1 +{"338":"val_338"} 1 +{"339":"val_339"} 1 +{"34":"val_34"} 1 +{"341":"val_341"} 1 +{"342":"val_342"} 2 +{"344":"val_344"} 2 +{"345":"val_345"} 1 +{"348":"val_348"} 5 +{"35":"val_35"} 3 +{"351":"val_351"} 1 +{"353":"val_353"} 2 +{"356":"val_356"} 1 +{"360":"val_360"} 1 +{"362":"val_362"} 1 +{"364":"val_364"} 1 +{"365":"val_365"} 1 +{"366":"val_366"} 1 +{"367":"val_367"} 2 +{"368":"val_368"} 1 +{"369":"val_369"} 3 +{"37":"val_37"} 2 +{"373":"val_373"} 1 +{"374":"val_374"} 1 +{"375":"val_375"} 1 +{"377":"val_377"} 1 +{"378":"val_378"} 1 +{"379":"val_379"} 1 +{"382":"val_382"} 2 +{"384":"val_384"} 3 +{"386":"val_386"} 1 +{"389":"val_389"} 1 +{"392":"val_392"} 1 +{"393":"val_393"} 1 +{"394":"val_394"} 1 +{"395":"val_395"} 2 +{"396":"val_396"} 3 +{"397":"val_397"} 2 +{"399":"val_399"} 2 +{"4":"val_4"} 1 +{"400":"val_400"} 1 +{"401":"val_401"} 5 +{"402":"val_402"} 1 +{"403":"val_403"} 3 +{"404":"val_404"} 2 +{"406":"val_406"} 4 +{"407":"val_407"} 1 +{"409":"val_409"} 3 +{"41":"val_41"} 1 +{"411":"val_411"} 1 +{"413":"val_413"} 2 +{"414":"val_414"} 2 +{"417":"val_417"} 3 +{"418":"val_418"} 1 +{"419":"val_419"} 1 +{"42":"val_42"} 2 +{"421":"val_421"} 1 +{"424":"val_424"} 2 +{"427":"val_427"} 1 +{"429":"val_429"} 2 +{"43":"val_43"} 1 +{"430":"val_430"} 3 +{"431":"val_431"} 3 +{"432":"val_432"} 1 +{"435":"val_435"} 1 +{"436":"val_436"} 1 +{"437":"val_437"} 1 +{"438":"val_438"} 3 +{"439":"val_439"} 2 +{"44":"val_44"} 1 +{"443":"val_443"} 1 +{"444":"val_444"} 1 +{"446":"val_446"} 1 +{"448":"val_448"} 1 +{"449":"val_449"} 1 +{"452":"val_452"} 1 +{"453":"val_453"} 1 +{"454":"val_454"} 3 +{"455":"val_455"} 1 +{"457":"val_457"} 1 +{"458":"val_458"} 2 +{"459":"val_459"} 2 +{"460":"val_460"} 1 +{"462":"val_462"} 2 +{"463":"val_463"} 2 +{"466":"val_466"} 3 +{"467":"val_467"} 1 +{"468":"val_468"} 4 +{"469":"val_469"} 5 +{"47":"val_47"} 1 +{"470":"val_470"} 1 +{"472":"val_472"} 1 +{"475":"val_475"} 1 +{"477":"val_477"} 1 +{"478":"val_478"} 2 +{"479":"val_479"} 1 +{"480":"val_480"} 3 +{"481":"val_481"} 1 +{"482":"val_482"} 1 +{"483":"val_483"} 1 +{"484":"val_484"} 1 +{"485":"val_485"} 1 +{"487":"val_487"} 1 +{"489":"val_489"} 4 +{"490":"val_490"} 1 +{"491":"val_491"} 1 +{"492":"val_492"} 2 +{"493":"val_493"} 1 +{"494":"val_494"} 1 +{"495":"val_495"} 1 +{"496":"val_496"} 1 +{"497":"val_497"} 1 +{"498":"val_498"} 3 +{"5":"val_5"} 3 +{"51":"val_51"} 2 +{"53":"val_53"} 1 +{"54":"val_54"} 1 +{"57":"val_57"} 1 +{"58":"val_58"} 2 +{"64":"val_64"} 1 +{"65":"val_65"} 1 +{"66":"val_66"} 1 +{"67":"val_67"} 2 +{"69":"val_69"} 1 +{"70":"val_70"} 3 +{"72":"val_72"} 2 +{"74":"val_74"} 1 +{"76":"val_76"} 2 +{"77":"val_77"} 1 +{"78":"val_78"} 1 +{"8":"val_8"} 1 +{"80":"val_80"} 1 +{"82":"val_82"} 1 +{"83":"val_83"} 2 +{"84":"val_84"} 2 +{"85":"val_85"} 1 +{"86":"val_86"} 1 +{"87":"val_87"} 1 +{"9":"val_9"} 1 +{"90":"val_90"} 3 +{"92":"val_92"} 1 +{"95":"val_95"} 2 +{"96":"val_96"} 1 +{"97":"val_97"} 2 +{"98":"val_98"} 2 +PREHOOK: query: SELECT DEST3.* FROM DEST3 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST3.* FROM DEST3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest3 +#### A masked pattern was here #### +{"col1":"0","col2":"val_0"} 3 +{"col1":"10","col2":"val_10"} 1 +{"col1":"100","col2":"val_100"} 2 +{"col1":"103","col2":"val_103"} 2 +{"col1":"104","col2":"val_104"} 2 +{"col1":"105","col2":"val_105"} 1 +{"col1":"11","col2":"val_11"} 1 +{"col1":"111","col2":"val_111"} 1 +{"col1":"113","col2":"val_113"} 2 +{"col1":"114","col2":"val_114"} 1 +{"col1":"116","col2":"val_116"} 1 +{"col1":"118","col2":"val_118"} 2 +{"col1":"119","col2":"val_119"} 3 +{"col1":"12","col2":"val_12"} 2 +{"col1":"120","col2":"val_120"} 2 +{"col1":"125","col2":"val_125"} 2 +{"col1":"126","col2":"val_126"} 1 +{"col1":"128","col2":"val_128"} 3 +{"col1":"129","col2":"val_129"} 2 +{"col1":"131","col2":"val_131"} 1 +{"col1":"133","col2":"val_133"} 1 +{"col1":"134","col2":"val_134"} 2 +{"col1":"136","col2":"val_136"} 1 +{"col1":"137","col2":"val_137"} 2 +{"col1":"138","col2":"val_138"} 4 +{"col1":"143","col2":"val_143"} 1 +{"col1":"145","col2":"val_145"} 1 +{"col1":"146","col2":"val_146"} 2 +{"col1":"149","col2":"val_149"} 2 +{"col1":"15","col2":"val_15"} 2 +{"col1":"150","col2":"val_150"} 1 +{"col1":"152","col2":"val_152"} 2 +{"col1":"153","col2":"val_153"} 1 +{"col1":"155","col2":"val_155"} 1 +{"col1":"156","col2":"val_156"} 1 +{"col1":"157","col2":"val_157"} 1 +{"col1":"158","col2":"val_158"} 1 +{"col1":"160","col2":"val_160"} 1 +{"col1":"162","col2":"val_162"} 1 +{"col1":"163","col2":"val_163"} 1 +{"col1":"164","col2":"val_164"} 2 +{"col1":"165","col2":"val_165"} 2 +{"col1":"166","col2":"val_166"} 1 +{"col1":"167","col2":"val_167"} 3 +{"col1":"168","col2":"val_168"} 1 +{"col1":"169","col2":"val_169"} 4 +{"col1":"17","col2":"val_17"} 1 +{"col1":"170","col2":"val_170"} 1 +{"col1":"172","col2":"val_172"} 2 +{"col1":"174","col2":"val_174"} 2 +{"col1":"175","col2":"val_175"} 2 +{"col1":"176","col2":"val_176"} 2 +{"col1":"177","col2":"val_177"} 1 +{"col1":"178","col2":"val_178"} 1 +{"col1":"179","col2":"val_179"} 2 +{"col1":"18","col2":"val_18"} 2 +{"col1":"180","col2":"val_180"} 1 +{"col1":"181","col2":"val_181"} 1 +{"col1":"183","col2":"val_183"} 1 +{"col1":"186","col2":"val_186"} 1 +{"col1":"187","col2":"val_187"} 3 +{"col1":"189","col2":"val_189"} 1 +{"col1":"19","col2":"val_19"} 1 +{"col1":"190","col2":"val_190"} 1 +{"col1":"191","col2":"val_191"} 2 +{"col1":"192","col2":"val_192"} 1 +{"col1":"193","col2":"val_193"} 3 +{"col1":"194","col2":"val_194"} 1 +{"col1":"195","col2":"val_195"} 2 +{"col1":"196","col2":"val_196"} 1 +{"col1":"197","col2":"val_197"} 2 +{"col1":"199","col2":"val_199"} 3 +{"col1":"2","col2":"val_2"} 1 +{"col1":"20","col2":"val_20"} 1 +{"col1":"200","col2":"val_200"} 2 +{"col1":"201","col2":"val_201"} 1 +{"col1":"202","col2":"val_202"} 1 +{"col1":"203","col2":"val_203"} 2 +{"col1":"205","col2":"val_205"} 2 +{"col1":"207","col2":"val_207"} 2 +{"col1":"208","col2":"val_208"} 3 +{"col1":"209","col2":"val_209"} 2 +{"col1":"213","col2":"val_213"} 2 +{"col1":"214","col2":"val_214"} 1 +{"col1":"216","col2":"val_216"} 2 +{"col1":"217","col2":"val_217"} 2 +{"col1":"218","col2":"val_218"} 1 +{"col1":"219","col2":"val_219"} 2 +{"col1":"221","col2":"val_221"} 2 +{"col1":"222","col2":"val_222"} 1 +{"col1":"223","col2":"val_223"} 2 +{"col1":"224","col2":"val_224"} 2 +{"col1":"226","col2":"val_226"} 1 +{"col1":"228","col2":"val_228"} 1 +{"col1":"229","col2":"val_229"} 2 +{"col1":"230","col2":"val_230"} 5 +{"col1":"233","col2":"val_233"} 2 +{"col1":"235","col2":"val_235"} 1 +{"col1":"237","col2":"val_237"} 2 +{"col1":"238","col2":"val_238"} 2 +{"col1":"239","col2":"val_239"} 2 +{"col1":"24","col2":"val_24"} 2 +{"col1":"241","col2":"val_241"} 1 +{"col1":"242","col2":"val_242"} 2 +{"col1":"244","col2":"val_244"} 1 +{"col1":"247","col2":"val_247"} 1 +{"col1":"248","col2":"val_248"} 1 +{"col1":"249","col2":"val_249"} 1 +{"col1":"252","col2":"val_252"} 1 +{"col1":"255","col2":"val_255"} 2 +{"col1":"256","col2":"val_256"} 2 +{"col1":"257","col2":"val_257"} 1 +{"col1":"258","col2":"val_258"} 1 +{"col1":"26","col2":"val_26"} 2 +{"col1":"260","col2":"val_260"} 1 +{"col1":"262","col2":"val_262"} 1 +{"col1":"263","col2":"val_263"} 1 +{"col1":"265","col2":"val_265"} 2 +{"col1":"266","col2":"val_266"} 1 +{"col1":"27","col2":"val_27"} 1 +{"col1":"272","col2":"val_272"} 2 +{"col1":"273","col2":"val_273"} 3 +{"col1":"274","col2":"val_274"} 1 +{"col1":"275","col2":"val_275"} 1 +{"col1":"277","col2":"val_277"} 4 +{"col1":"278","col2":"val_278"} 2 +{"col1":"28","col2":"val_28"} 1 +{"col1":"280","col2":"val_280"} 2 +{"col1":"281","col2":"val_281"} 2 +{"col1":"282","col2":"val_282"} 2 +{"col1":"283","col2":"val_283"} 1 +{"col1":"284","col2":"val_284"} 1 +{"col1":"285","col2":"val_285"} 1 +{"col1":"286","col2":"val_286"} 1 +{"col1":"287","col2":"val_287"} 1 +{"col1":"288","col2":"val_288"} 2 +{"col1":"289","col2":"val_289"} 1 +{"col1":"291","col2":"val_291"} 1 +{"col1":"292","col2":"val_292"} 1 +{"col1":"296","col2":"val_296"} 1 +{"col1":"298","col2":"val_298"} 3 +{"col1":"30","col2":"val_30"} 1 +{"col1":"302","col2":"val_302"} 1 +{"col1":"305","col2":"val_305"} 1 +{"col1":"306","col2":"val_306"} 1 +{"col1":"307","col2":"val_307"} 2 +{"col1":"308","col2":"val_308"} 1 +{"col1":"309","col2":"val_309"} 2 +{"col1":"310","col2":"val_310"} 1 +{"col1":"311","col2":"val_311"} 3 +{"col1":"315","col2":"val_315"} 1 +{"col1":"316","col2":"val_316"} 3 +{"col1":"317","col2":"val_317"} 2 +{"col1":"318","col2":"val_318"} 3 +{"col1":"321","col2":"val_321"} 2 +{"col1":"322","col2":"val_322"} 2 +{"col1":"323","col2":"val_323"} 1 +{"col1":"325","col2":"val_325"} 2 +{"col1":"327","col2":"val_327"} 3 +{"col1":"33","col2":"val_33"} 1 +{"col1":"331","col2":"val_331"} 2 +{"col1":"332","col2":"val_332"} 1 +{"col1":"333","col2":"val_333"} 2 +{"col1":"335","col2":"val_335"} 1 +{"col1":"336","col2":"val_336"} 1 +{"col1":"338","col2":"val_338"} 1 +{"col1":"339","col2":"val_339"} 1 +{"col1":"34","col2":"val_34"} 1 +{"col1":"341","col2":"val_341"} 1 +{"col1":"342","col2":"val_342"} 2 +{"col1":"344","col2":"val_344"} 2 +{"col1":"345","col2":"val_345"} 1 +{"col1":"348","col2":"val_348"} 5 +{"col1":"35","col2":"val_35"} 3 +{"col1":"351","col2":"val_351"} 1 +{"col1":"353","col2":"val_353"} 2 +{"col1":"356","col2":"val_356"} 1 +{"col1":"360","col2":"val_360"} 1 +{"col1":"362","col2":"val_362"} 1 +{"col1":"364","col2":"val_364"} 1 +{"col1":"365","col2":"val_365"} 1 +{"col1":"366","col2":"val_366"} 1 +{"col1":"367","col2":"val_367"} 2 +{"col1":"368","col2":"val_368"} 1 +{"col1":"369","col2":"val_369"} 3 +{"col1":"37","col2":"val_37"} 2 +{"col1":"373","col2":"val_373"} 1 +{"col1":"374","col2":"val_374"} 1 +{"col1":"375","col2":"val_375"} 1 +{"col1":"377","col2":"val_377"} 1 +{"col1":"378","col2":"val_378"} 1 +{"col1":"379","col2":"val_379"} 1 +{"col1":"382","col2":"val_382"} 2 +{"col1":"384","col2":"val_384"} 3 +{"col1":"386","col2":"val_386"} 1 +{"col1":"389","col2":"val_389"} 1 +{"col1":"392","col2":"val_392"} 1 +{"col1":"393","col2":"val_393"} 1 +{"col1":"394","col2":"val_394"} 1 +{"col1":"395","col2":"val_395"} 2 +{"col1":"396","col2":"val_396"} 3 +{"col1":"397","col2":"val_397"} 2 +{"col1":"399","col2":"val_399"} 2 +{"col1":"4","col2":"val_4"} 1 +{"col1":"400","col2":"val_400"} 1 +{"col1":"401","col2":"val_401"} 5 +{"col1":"402","col2":"val_402"} 1 +{"col1":"403","col2":"val_403"} 3 +{"col1":"404","col2":"val_404"} 2 +{"col1":"406","col2":"val_406"} 4 +{"col1":"407","col2":"val_407"} 1 +{"col1":"409","col2":"val_409"} 3 +{"col1":"41","col2":"val_41"} 1 +{"col1":"411","col2":"val_411"} 1 +{"col1":"413","col2":"val_413"} 2 +{"col1":"414","col2":"val_414"} 2 +{"col1":"417","col2":"val_417"} 3 +{"col1":"418","col2":"val_418"} 1 +{"col1":"419","col2":"val_419"} 1 +{"col1":"42","col2":"val_42"} 2 +{"col1":"421","col2":"val_421"} 1 +{"col1":"424","col2":"val_424"} 2 +{"col1":"427","col2":"val_427"} 1 +{"col1":"429","col2":"val_429"} 2 +{"col1":"43","col2":"val_43"} 1 +{"col1":"430","col2":"val_430"} 3 +{"col1":"431","col2":"val_431"} 3 +{"col1":"432","col2":"val_432"} 1 +{"col1":"435","col2":"val_435"} 1 +{"col1":"436","col2":"val_436"} 1 +{"col1":"437","col2":"val_437"} 1 +{"col1":"438","col2":"val_438"} 3 +{"col1":"439","col2":"val_439"} 2 +{"col1":"44","col2":"val_44"} 1 +{"col1":"443","col2":"val_443"} 1 +{"col1":"444","col2":"val_444"} 1 +{"col1":"446","col2":"val_446"} 1 +{"col1":"448","col2":"val_448"} 1 +{"col1":"449","col2":"val_449"} 1 +{"col1":"452","col2":"val_452"} 1 +{"col1":"453","col2":"val_453"} 1 +{"col1":"454","col2":"val_454"} 3 +{"col1":"455","col2":"val_455"} 1 +{"col1":"457","col2":"val_457"} 1 +{"col1":"458","col2":"val_458"} 2 +{"col1":"459","col2":"val_459"} 2 +{"col1":"460","col2":"val_460"} 1 +{"col1":"462","col2":"val_462"} 2 +{"col1":"463","col2":"val_463"} 2 +{"col1":"466","col2":"val_466"} 3 +{"col1":"467","col2":"val_467"} 1 +{"col1":"468","col2":"val_468"} 4 +{"col1":"469","col2":"val_469"} 5 +{"col1":"47","col2":"val_47"} 1 +{"col1":"470","col2":"val_470"} 1 +{"col1":"472","col2":"val_472"} 1 +{"col1":"475","col2":"val_475"} 1 +{"col1":"477","col2":"val_477"} 1 +{"col1":"478","col2":"val_478"} 2 +{"col1":"479","col2":"val_479"} 1 +{"col1":"480","col2":"val_480"} 3 +{"col1":"481","col2":"val_481"} 1 +{"col1":"482","col2":"val_482"} 1 +{"col1":"483","col2":"val_483"} 1 +{"col1":"484","col2":"val_484"} 1 +{"col1":"485","col2":"val_485"} 1 +{"col1":"487","col2":"val_487"} 1 +{"col1":"489","col2":"val_489"} 4 +{"col1":"490","col2":"val_490"} 1 +{"col1":"491","col2":"val_491"} 1 +{"col1":"492","col2":"val_492"} 2 +{"col1":"493","col2":"val_493"} 1 +{"col1":"494","col2":"val_494"} 1 +{"col1":"495","col2":"val_495"} 1 +{"col1":"496","col2":"val_496"} 1 +{"col1":"497","col2":"val_497"} 1 +{"col1":"498","col2":"val_498"} 3 +{"col1":"5","col2":"val_5"} 3 +{"col1":"51","col2":"val_51"} 2 +{"col1":"53","col2":"val_53"} 1 +{"col1":"54","col2":"val_54"} 1 +{"col1":"57","col2":"val_57"} 1 +{"col1":"58","col2":"val_58"} 2 +{"col1":"64","col2":"val_64"} 1 +{"col1":"65","col2":"val_65"} 1 +{"col1":"66","col2":"val_66"} 1 +{"col1":"67","col2":"val_67"} 2 +{"col1":"69","col2":"val_69"} 1 +{"col1":"70","col2":"val_70"} 3 +{"col1":"72","col2":"val_72"} 2 +{"col1":"74","col2":"val_74"} 1 +{"col1":"76","col2":"val_76"} 2 +{"col1":"77","col2":"val_77"} 1 +{"col1":"78","col2":"val_78"} 1 +{"col1":"8","col2":"val_8"} 1 +{"col1":"80","col2":"val_80"} 1 +{"col1":"82","col2":"val_82"} 1 +{"col1":"83","col2":"val_83"} 2 +{"col1":"84","col2":"val_84"} 2 +{"col1":"85","col2":"val_85"} 1 +{"col1":"86","col2":"val_86"} 1 +{"col1":"87","col2":"val_87"} 1 +{"col1":"9","col2":"val_9"} 1 +{"col1":"90","col2":"val_90"} 3 +{"col1":"92","col2":"val_92"} 1 +{"col1":"95","col2":"val_95"} 2 +{"col1":"96","col2":"val_96"} 1 +{"col1":"97","col2":"val_97"} 2 +{"col1":"98","col2":"val_98"} 2 diff --git ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out new file mode 100644 index 0000000..7470843 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby_complex_types_multi_single_reducer.q.out @@ -0,0 +1,243 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key ARRAY, value BIGINT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE DEST1(key ARRAY, value BIGINT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST1 +PREHOOK: query: CREATE TABLE DEST2(key MAP, value BIGINT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@DEST2 +POSTHOOK: query: CREATE TABLE DEST2(key MAP, value BIGINT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@DEST2 +PREHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Map 1 (GROUP, 1) + Reducer 5 <- Reducer 4 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: array(key) (type: array) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: array) + sort order: + + Map-reduce partition columns: _col0 (type: array) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: map(key:value) (type: map) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: map) + sort order: + + Map-reduce partition columns: _col0 (type: map) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: array) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: array), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: array), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: array), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: map) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: map), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: map), _col1 (type: bigint) + Reducer 5 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: map), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM SRC +INSERT OVERWRITE TABLE DEST1 SELECT ARRAY(SRC.key), COUNT(1) GROUP BY ARRAY(SRC.key) limit 10 +INSERT OVERWRITE TABLE DEST2 SELECT MAP(SRC.key, SRC.value), COUNT(1) GROUP BY MAP(SRC.key, SRC.value) limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.value EXPRESSION [(src)src.null, ] +PREHOOK: query: SELECT DEST1.* FROM DEST1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST1.* FROM DEST1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +["105"] 1 +["145"] 1 +["181"] 1 +["195"] 2 +["244"] 1 +["335"] 1 +["336"] 1 +["4"] 1 +["421"] 1 +["83"] 2 +PREHOOK: query: SELECT DEST2.* FROM DEST2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT DEST2.* FROM DEST2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +{"156":"val_156"} 1 +{"170":"val_170"} 1 +{"186":"val_186"} 1 +{"274":"val_274"} 1 +{"332":"val_332"} 1 +{"402":"val_402"} 1 +{"480":"val_480"} 3 +{"5":"val_5"} 3 +{"67":"val_67"} 2 +{"96":"val_96"} 1 diff --git ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out new file mode 100644 index 0000000..d3457da --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby_multi_insert_common_distinct.q.out @@ -0,0 +1,385 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +create table dest1(key int, cnt int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +create table dest1(key int, cnt int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: create table dest2(key int, cnt int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest2 +POSTHOOK: query: create table dest2(key int, cnt int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest2 +PREHOOK: query: explain +from src +insert overwrite table dest1 select key, count(distinct value) group by key +insert overwrite table dest2 select key+key, count(distinct value) group by key+key +PREHOOK: type: QUERY +POSTHOOK: query: explain +from src +insert overwrite table dest1 select key, count(distinct value) group by key +insert overwrite table dest2 select key+key, count(distinct value) group by key+key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) + Reducer 4 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: key (type: string), (key + key) (type: double) + Reducer 2 + Reduce Operator Tree: + Forward + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Group By Operator + aggregations: count(DISTINCT KEY._col0) + keys: VALUE._col1 (type: double) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 4 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: double) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from src +insert overwrite table dest1 select key, count(distinct value) group by key +insert overwrite table dest2 select key+key, count(distinct value) group by key+key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from src +insert overwrite table dest1 select key, count(distinct value) group by key +insert overwrite table dest2 select key+key, count(distinct value) group by key+key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from dest1 where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +2 1 +4 1 +5 1 +8 1 +9 1 +PREHOOK: query: select * from dest2 where key < 20 order by key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 where key < 20 order by key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 1 +10 1 +16 1 +18 1 +4 1 +8 1 +PREHOOK: query: -- no need to spray by distinct key first +explain +from src +insert overwrite table dest1 select key, count(distinct value) group by key +insert overwrite table dest2 select key+key, count(distinct value) group by key+key +PREHOOK: type: QUERY +POSTHOOK: query: -- no need to spray by distinct key first +explain +from src +insert overwrite table dest1 select key, count(distinct value) group by key +insert overwrite table dest2 select key+key, count(distinct value) group by key+key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) + Reducer 3 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value) + keys: key (type: string), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT value) + keys: (key + key) (type: double), value (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0) + keys: KEY._col0 (type: double) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), UDFToInteger(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-4 + Stats-Aggr Operator + + Stage: Stage-1 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest2 + + Stage: Stage-5 + Stats-Aggr Operator + +PREHOOK: query: from src +insert overwrite table dest1 select key, count(distinct value) group by key +insert overwrite table dest2 select key+key, count(distinct value) group by key+key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +PREHOOK: Output: default@dest2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from src +insert overwrite table dest1 select key, count(distinct value) group by key +insert overwrite table dest2 select key+key, count(distinct value) group by key+key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Output: default@dest2 +POSTHOOK: Lineage: dest1.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.cnt EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: select * from dest1 where key < 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest1 where key < 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 +2 1 +4 1 +5 1 +8 1 +9 1 +PREHOOK: query: select * from dest2 where key < 20 order by key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest2 +#### A masked pattern was here #### +POSTHOOK: query: select * from dest2 where key < 20 order by key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest2 +#### A masked pattern was here #### +0 1 +10 1 +16 1 +18 1 +4 1 +8 1 diff --git ql/src/test/results/clientpositive/spark/pcr.q.out ql/src/test/results/clientpositive/spark/pcr.q.out new file mode 100644 index 0000000..4e9244f --- /dev/null +++ ql/src/test/results/clientpositive/spark/pcr.q.out @@ -0,0 +1,5470 @@ +PREHOOK: query: drop table pcr_t1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcr_t1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table pcr_t2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcr_t2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table pcr_t3 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table pcr_t3 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: create table pcr_t1 (key int, value string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcr_t1 +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds=2000-04-08 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-08') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-08).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds=2000-04-09 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-09') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds=2000-04-09 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-09).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds=2000-04-10 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-10') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds=2000-04-10 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-10).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended select key, value, ds from pcr_t1 where ds<='2000-04-09' and key<5 order by key, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where ds<='2000-04-09' and key<5 order by key, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + and + <= + TOK_TABLE_OR_COL + ds + '2000-04-09' + < + TOK_TABLE_OR_COL + key + 5 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key < 5) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 where ds<='2000-04-09' and key<5 order by key, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 where ds<='2000-04-09' and key<5 order by key, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +0 val_0 2000-04-08 +0 val_0 2000-04-08 +0 val_0 2000-04-08 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +2 val_2 2000-04-08 +2 val_2 2000-04-09 +4 val_4 2000-04-08 +4 val_4 2000-04-09 +PREHOOK: query: explain extended select key, value from pcr_t1 where ds<='2000-04-09' or key<5 order by key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from pcr_t1 where ds<='2000-04-09' or key<5 order by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + or + <= + TOK_TABLE_OR_COL + ds + '2000-04-09' + < + TOK_TABLE_OR_COL + key + 5 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((ds <= '2000-04-09') or (key < 5)) (type: boolean) + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-10 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value from pcr_t1 where ds<='2000-04-09' or key<5 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +PREHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from pcr_t1 where ds<='2000-04-09' or key<5 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +2 val_2 +2 val_2 +4 val_4 +4 val_4 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +8 val_8 +9 val_9 +9 val_9 +10 val_10 +10 val_10 +11 val_11 +11 val_11 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +17 val_17 +17 val_17 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +19 val_19 +19 val_19 +PREHOOK: query: explain extended select key, value, ds from pcr_t1 where ds<='2000-04-09' and key<5 and value != 'val_2' order by key, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where ds<='2000-04-09' and key<5 and value != 'val_2' order by key, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + and + and + <= + TOK_TABLE_OR_COL + ds + '2000-04-09' + < + TOK_TABLE_OR_COL + key + 5 + != + TOK_TABLE_OR_COL + value + 'val_2' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((key < 5) and (value <> 'val_2')) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 where ds<='2000-04-09' and key<5 and value != 'val_2' order by key, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 where ds<='2000-04-09' and key<5 and value != 'val_2' order by key, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +0 val_0 2000-04-08 +0 val_0 2000-04-08 +0 val_0 2000-04-08 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +4 val_4 2000-04-08 +4 val_4 2000-04-09 +PREHOOK: query: explain extended +select key, value, ds from pcr_t1 +where (ds < '2000-04-09' and key < 5) or (ds > '2000-04-09' and value == 'val_5') order by key, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select key, value, ds from pcr_t1 +where (ds < '2000-04-09' and key < 5) or (ds > '2000-04-09' and value == 'val_5') order by key, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + < + TOK_TABLE_OR_COL + ds + '2000-04-09' + < + TOK_TABLE_OR_COL + key + 5 + and + > + TOK_TABLE_OR_COL + ds + '2000-04-09' + == + TOK_TABLE_OR_COL + value + 'val_5' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((ds < '2000-04-09') and (key < 5)) or ((ds > '2000-04-09') and (value = 'val_5'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-10 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 +where (ds < '2000-04-09' and key < 5) or (ds > '2000-04-09' and value == 'val_5') order by key, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 +where (ds < '2000-04-09' and key < 5) or (ds > '2000-04-09' and value == 'val_5') order by key, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +0 val_0 2000-04-08 +0 val_0 2000-04-08 +0 val_0 2000-04-08 +2 val_2 2000-04-08 +4 val_4 2000-04-08 +5 val_5 2000-04-10 +5 val_5 2000-04-10 +5 val_5 2000-04-10 +PREHOOK: query: explain extended +select key, value, ds from pcr_t1 +where (ds < '2000-04-10' and key < 5) or (ds > '2000-04-08' and value == 'val_5') order by key, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select key, value, ds from pcr_t1 +where (ds < '2000-04-10' and key < 5) or (ds > '2000-04-08' and value == 'val_5') order by key, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + < + TOK_TABLE_OR_COL + ds + '2000-04-10' + < + TOK_TABLE_OR_COL + key + 5 + and + > + TOK_TABLE_OR_COL + ds + '2000-04-08' + == + TOK_TABLE_OR_COL + value + 'val_5' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((ds < '2000-04-10') and (key < 5)) or ((ds > '2000-04-08') and (value = 'val_5'))) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-10 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 +where (ds < '2000-04-10' and key < 5) or (ds > '2000-04-08' and value == 'val_5') order by key, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +PREHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 +where (ds < '2000-04-10' and key < 5) or (ds > '2000-04-08' and value == 'val_5') order by key, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +0 val_0 2000-04-08 +0 val_0 2000-04-08 +0 val_0 2000-04-08 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +2 val_2 2000-04-08 +2 val_2 2000-04-09 +4 val_4 2000-04-08 +4 val_4 2000-04-09 +5 val_5 2000-04-09 +5 val_5 2000-04-09 +5 val_5 2000-04-09 +5 val_5 2000-04-10 +5 val_5 2000-04-10 +5 val_5 2000-04-10 +PREHOOK: query: explain extended +select key, value, ds from pcr_t1 +where (ds < '2000-04-10' or key < 5) and (ds > '2000-04-08' or value == 'val_5') order by key, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +select key, value, ds from pcr_t1 +where (ds < '2000-04-10' or key < 5) and (ds > '2000-04-08' or value == 'val_5') order by key, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + and + or + < + TOK_TABLE_OR_COL + ds + '2000-04-10' + < + TOK_TABLE_OR_COL + key + 5 + or + > + TOK_TABLE_OR_COL + ds + '2000-04-08' + == + TOK_TABLE_OR_COL + value + 'val_5' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((ds < '2000-04-10') or (key < 5)) and ((ds > '2000-04-08') or (value = 'val_5'))) (type: boolean) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-10 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 +where (ds < '2000-04-10' or key < 5) and (ds > '2000-04-08' or value == 'val_5') order by key, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +PREHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 +where (ds < '2000-04-10' or key < 5) and (ds > '2000-04-08' or value == 'val_5') order by key, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-10 +0 val_0 2000-04-10 +0 val_0 2000-04-10 +2 val_2 2000-04-09 +2 val_2 2000-04-10 +4 val_4 2000-04-09 +4 val_4 2000-04-10 +5 val_5 2000-04-08 +5 val_5 2000-04-08 +5 val_5 2000-04-08 +5 val_5 2000-04-09 +5 val_5 2000-04-09 +5 val_5 2000-04-09 +8 val_8 2000-04-09 +9 val_9 2000-04-09 +10 val_10 2000-04-09 +11 val_11 2000-04-09 +12 val_12 2000-04-09 +12 val_12 2000-04-09 +15 val_15 2000-04-09 +15 val_15 2000-04-09 +17 val_17 2000-04-09 +18 val_18 2000-04-09 +18 val_18 2000-04-09 +19 val_19 2000-04-09 +PREHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-04-08' or ds='2000-04-09') and key=14 order by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from pcr_t1 where (ds='2000-04-08' or ds='2000-04-09') and key=14 order by key, value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + or + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 14 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 14) (type: boolean) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 14 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: 14 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value from pcr_t1 where (ds='2000-04-08' or ds='2000-04-09') and key=14 order by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from pcr_t1 where (ds='2000-04-08' or ds='2000-04-09') and key=14 order by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +PREHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04-08' or ds='2000-04-09' order by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from pcr_t1 where ds='2000-04-08' or ds='2000-04-09' order by key, value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + or + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value from pcr_t1 where ds='2000-04-08' or ds='2000-04-09' order by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from pcr_t1 where ds='2000-04-08' or ds='2000-04-09' order by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +2 val_2 +4 val_4 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +8 val_8 +9 val_9 +9 val_9 +10 val_10 +10 val_10 +11 val_11 +11 val_11 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +17 val_17 +17 val_17 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +19 val_19 +19 val_19 +PREHOOK: query: explain extended select key, value from pcr_t1 where ds>='2000-04-08' or ds<'2000-04-10' order by key, value +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from pcr_t1 where ds>='2000-04-08' or ds<'2000-04-10' order by key, value +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + or + >= + TOK_TABLE_OR_COL + ds + '2000-04-08' + < + TOK_TABLE_OR_COL + ds + '2000-04-10' + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-10 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value from pcr_t1 where ds>='2000-04-08' or ds<'2000-04-10' order by key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +PREHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select key, value from pcr_t1 where ds>='2000-04-08' or ds<'2000-04-10' order by key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +0 val_0 +2 val_2 +2 val_2 +2 val_2 +4 val_4 +4 val_4 +4 val_4 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +5 val_5 +8 val_8 +8 val_8 +8 val_8 +9 val_9 +9 val_9 +9 val_9 +10 val_10 +10 val_10 +10 val_10 +11 val_11 +11 val_11 +11 val_11 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +12 val_12 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +15 val_15 +17 val_17 +17 val_17 +17 val_17 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +18 val_18 +19 val_19 +19 val_19 +19 val_19 +PREHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 1 + and + = + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((ds = '2000-04-08') and (key = 1)) or ((ds = '2000-04-09') and (key = 2))) (type: boolean) + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 where (ds='2000-04-08' and key=1) or (ds='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +2 val_2 2000-04-09 +PREHOOK: query: explain extended select * from pcr_t1 t1 join pcr_t1 t2 on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08' order by t1.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from pcr_t1 t1 join pcr_t1 t2 on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08' order by t1.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcr_t1 + t1 + TOK_TABREF + TOK_TABNAME + pcr_t1 + t2 + and + and + = + . + TOK_TABLE_OR_COL + t1 + key + . + TOK_TABLE_OR_COL + t2 + key + = + . + TOK_TABLE_OR_COL + t1 + ds + '2000-04-08' + = + . + TOK_TABLE_OR_COL + t2 + ds + '2000-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t1 + key + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [t2] + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [t1] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), VALUE._col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:string:string:int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from pcr_t1 t1 join pcr_t1 t2 on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08' order by t1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +#### A masked pattern was here #### +POSTHOOK: query: select * from pcr_t1 t1 join pcr_t1 t2 on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-08' order by t1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +#### A masked pattern was here #### +0 val_0 2000-04-08 0 val_0 2000-04-08 +0 val_0 2000-04-08 0 val_0 2000-04-08 +0 val_0 2000-04-08 0 val_0 2000-04-08 +0 val_0 2000-04-08 0 val_0 2000-04-08 +0 val_0 2000-04-08 0 val_0 2000-04-08 +0 val_0 2000-04-08 0 val_0 2000-04-08 +0 val_0 2000-04-08 0 val_0 2000-04-08 +0 val_0 2000-04-08 0 val_0 2000-04-08 +0 val_0 2000-04-08 0 val_0 2000-04-08 +2 val_2 2000-04-08 2 val_2 2000-04-08 +4 val_4 2000-04-08 4 val_4 2000-04-08 +5 val_5 2000-04-08 5 val_5 2000-04-08 +5 val_5 2000-04-08 5 val_5 2000-04-08 +5 val_5 2000-04-08 5 val_5 2000-04-08 +5 val_5 2000-04-08 5 val_5 2000-04-08 +5 val_5 2000-04-08 5 val_5 2000-04-08 +5 val_5 2000-04-08 5 val_5 2000-04-08 +5 val_5 2000-04-08 5 val_5 2000-04-08 +5 val_5 2000-04-08 5 val_5 2000-04-08 +5 val_5 2000-04-08 5 val_5 2000-04-08 +8 val_8 2000-04-08 8 val_8 2000-04-08 +9 val_9 2000-04-08 9 val_9 2000-04-08 +10 val_10 2000-04-08 10 val_10 2000-04-08 +11 val_11 2000-04-08 11 val_11 2000-04-08 +12 val_12 2000-04-08 12 val_12 2000-04-08 +12 val_12 2000-04-08 12 val_12 2000-04-08 +12 val_12 2000-04-08 12 val_12 2000-04-08 +12 val_12 2000-04-08 12 val_12 2000-04-08 +15 val_15 2000-04-08 15 val_15 2000-04-08 +15 val_15 2000-04-08 15 val_15 2000-04-08 +15 val_15 2000-04-08 15 val_15 2000-04-08 +15 val_15 2000-04-08 15 val_15 2000-04-08 +17 val_17 2000-04-08 17 val_17 2000-04-08 +18 val_18 2000-04-08 18 val_18 2000-04-08 +18 val_18 2000-04-08 18 val_18 2000-04-08 +18 val_18 2000-04-08 18 val_18 2000-04-08 +18 val_18 2000-04-08 18 val_18 2000-04-08 +19 val_19 2000-04-08 19 val_19 2000-04-08 +PREHOOK: query: explain extended select * from pcr_t1 t1 join pcr_t1 t2 on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09' order by t1.key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select * from pcr_t1 t1 join pcr_t1 t2 on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09' order by t1.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + pcr_t1 + t1 + TOK_TABREF + TOK_TABNAME + pcr_t1 + t2 + and + and + = + . + TOK_TABLE_OR_COL + t1 + key + . + TOK_TABLE_OR_COL + t2 + key + = + . + TOK_TABLE_OR_COL + t1 + ds + '2000-04-08' + = + . + TOK_TABLE_OR_COL + t2 + ds + '2000-04-09' + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + t1 + key + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t2 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-09 [t2] + Map 4 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string), ds (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [t1] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + 1 {KEY.reducesinkkey0} {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col6 (type: int), _col7 (type: string), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: int), _col4 (type: string), _col5 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: string), VALUE._col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 198 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:string:string:int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select * from pcr_t1 t1 join pcr_t1 t2 on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09' order by t1.key +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +POSTHOOK: query: select * from pcr_t1 t1 join pcr_t1 t2 on t1.key=t2.key and t1.ds='2000-04-08' and t2.ds='2000-04-09' order by t1.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +#### A masked pattern was here #### +0 val_0 2000-04-08 0 val_0 2000-04-09 +0 val_0 2000-04-08 0 val_0 2000-04-09 +0 val_0 2000-04-08 0 val_0 2000-04-09 +0 val_0 2000-04-08 0 val_0 2000-04-09 +0 val_0 2000-04-08 0 val_0 2000-04-09 +0 val_0 2000-04-08 0 val_0 2000-04-09 +0 val_0 2000-04-08 0 val_0 2000-04-09 +0 val_0 2000-04-08 0 val_0 2000-04-09 +0 val_0 2000-04-08 0 val_0 2000-04-09 +2 val_2 2000-04-08 2 val_2 2000-04-09 +4 val_4 2000-04-08 4 val_4 2000-04-09 +5 val_5 2000-04-08 5 val_5 2000-04-09 +5 val_5 2000-04-08 5 val_5 2000-04-09 +5 val_5 2000-04-08 5 val_5 2000-04-09 +5 val_5 2000-04-08 5 val_5 2000-04-09 +5 val_5 2000-04-08 5 val_5 2000-04-09 +5 val_5 2000-04-08 5 val_5 2000-04-09 +5 val_5 2000-04-08 5 val_5 2000-04-09 +5 val_5 2000-04-08 5 val_5 2000-04-09 +5 val_5 2000-04-08 5 val_5 2000-04-09 +8 val_8 2000-04-08 8 val_8 2000-04-09 +9 val_9 2000-04-08 9 val_9 2000-04-09 +10 val_10 2000-04-08 10 val_10 2000-04-09 +11 val_11 2000-04-08 11 val_11 2000-04-09 +12 val_12 2000-04-08 12 val_12 2000-04-09 +12 val_12 2000-04-08 12 val_12 2000-04-09 +12 val_12 2000-04-08 12 val_12 2000-04-09 +12 val_12 2000-04-08 12 val_12 2000-04-09 +15 val_15 2000-04-08 15 val_15 2000-04-09 +15 val_15 2000-04-08 15 val_15 2000-04-09 +15 val_15 2000-04-08 15 val_15 2000-04-09 +15 val_15 2000-04-08 15 val_15 2000-04-09 +17 val_17 2000-04-08 17 val_17 2000-04-09 +18 val_18 2000-04-08 18 val_18 2000-04-09 +18 val_18 2000-04-08 18 val_18 2000-04-09 +18 val_18 2000-04-08 18 val_18 2000-04-09 +18 val_18 2000-04-08 18 val_18 2000-04-09 +19 val_19 2000-04-08 19 val_19 2000-04-09 +PREHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-11') select * from src where key < 20 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_t1@ds=2000-04-11 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table pcr_t1 partition (ds='2000-04-11') select * from src where key < 20 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_t1@ds=2000-04-11 +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_t1 PARTITION(ds=2000-04-11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds>'2000-04-08' and ds<'2000-04-11') or (ds>='2000-04-08' and ds<='2000-04-11' and key=2) order by key, value, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds>'2000-04-08' and ds<'2000-04-11') or (ds>='2000-04-08' and ds<='2000-04-11' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + > + TOK_TABLE_OR_COL + ds + '2000-04-08' + < + TOK_TABLE_OR_COL + ds + '2000-04-11' + and + and + >= + TOK_TABLE_OR_COL + ds + '2000-04-08' + <= + TOK_TABLE_OR_COL + ds + '2000-04-11' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 4 Data size: 720 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (((ds > '2000-04-08') and (ds < '2000-04-11')) or (key = 2)) (type: boolean) + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-10 [pcr_t1] + /pcr_t1/ds=2000-04-11 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 2 Data size: 360 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 where (ds>'2000-04-08' and ds<'2000-04-11') or (ds>='2000-04-08' and ds<='2000-04-11' and key=2) order by key, value, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +PREHOOK: Input: default@pcr_t1@ds=2000-04-10 +PREHOOK: Input: default@pcr_t1@ds=2000-04-11 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 where (ds>'2000-04-08' and ds<'2000-04-11') or (ds>='2000-04-08' and ds<='2000-04-11' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-11 +#### A masked pattern was here #### +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-10 +0 val_0 2000-04-10 +0 val_0 2000-04-10 +2 val_2 2000-04-08 +2 val_2 2000-04-09 +2 val_2 2000-04-10 +2 val_2 2000-04-11 +4 val_4 2000-04-09 +4 val_4 2000-04-10 +5 val_5 2000-04-09 +5 val_5 2000-04-09 +5 val_5 2000-04-09 +5 val_5 2000-04-10 +5 val_5 2000-04-10 +5 val_5 2000-04-10 +8 val_8 2000-04-09 +8 val_8 2000-04-10 +9 val_9 2000-04-09 +9 val_9 2000-04-10 +10 val_10 2000-04-09 +10 val_10 2000-04-10 +11 val_11 2000-04-09 +11 val_11 2000-04-10 +12 val_12 2000-04-09 +12 val_12 2000-04-09 +12 val_12 2000-04-10 +12 val_12 2000-04-10 +15 val_15 2000-04-09 +15 val_15 2000-04-09 +15 val_15 2000-04-10 +15 val_15 2000-04-10 +17 val_17 2000-04-09 +17 val_17 2000-04-10 +18 val_18 2000-04-09 +18 val_18 2000-04-09 +18 val_18 2000-04-10 +18 val_18 2000-04-10 +19 val_19 2000-04-09 +19 val_19 2000-04-10 +PREHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds>'2000-04-08' and ds<'2000-04-11') or (ds<='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds from pcr_t1 where (ds>'2000-04-08' and ds<'2000-04-11') or (ds<='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_WHERE + or + and + > + TOK_TABLE_OR_COL + ds + '2000-04-08' + < + TOK_TABLE_OR_COL + ds + '2000-04-11' + and + <= + TOK_TABLE_OR_COL + ds + '2000-04-09' + = + TOK_TABLE_OR_COL + key + 2 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + value + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 3 Data size: 540 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: ((ds > '2000-04-08') or ((ds <= '2000-04-09') and (key = 2))) (type: boolean) + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: string), ds (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-09 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-09 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-10 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-10 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + /pcr_t1/ds=2000-04-09 [pcr_t1] + /pcr_t1/ds=2000-04-10 [pcr_t1] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2 + columns.types int:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds from pcr_t1 where (ds>'2000-04-08' and ds<'2000-04-11') or (ds<='2000-04-09' and key=2) order by key, value, ds +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Input: default@pcr_t1@ds=2000-04-09 +PREHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds from pcr_t1 where (ds>'2000-04-08' and ds<'2000-04-11') or (ds<='2000-04-09' and key=2) order by key, value, ds +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-09 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-10 +#### A masked pattern was here #### +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-09 +0 val_0 2000-04-10 +0 val_0 2000-04-10 +0 val_0 2000-04-10 +2 val_2 2000-04-08 +2 val_2 2000-04-09 +2 val_2 2000-04-10 +4 val_4 2000-04-09 +4 val_4 2000-04-10 +5 val_5 2000-04-09 +5 val_5 2000-04-09 +5 val_5 2000-04-09 +5 val_5 2000-04-10 +5 val_5 2000-04-10 +5 val_5 2000-04-10 +8 val_8 2000-04-09 +8 val_8 2000-04-10 +9 val_9 2000-04-09 +9 val_9 2000-04-10 +10 val_10 2000-04-09 +10 val_10 2000-04-10 +11 val_11 2000-04-09 +11 val_11 2000-04-10 +12 val_12 2000-04-09 +12 val_12 2000-04-09 +12 val_12 2000-04-10 +12 val_12 2000-04-10 +15 val_15 2000-04-09 +15 val_15 2000-04-09 +15 val_15 2000-04-10 +15 val_15 2000-04-10 +17 val_17 2000-04-09 +17 val_17 2000-04-10 +18 val_18 2000-04-09 +18 val_18 2000-04-09 +18 val_18 2000-04-10 +18 val_18 2000-04-10 +19 val_19 2000-04-09 +19 val_19 2000-04-10 +PREHOOK: query: create table pcr_t2 (key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcr_t2 +POSTHOOK: query: create table pcr_t2 (key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcr_t2 +PREHOOK: query: create table pcr_t3 (key int, value string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcr_t3 +POSTHOOK: query: create table pcr_t3 (key int, value string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcr_t3 +PREHOOK: query: explain extended +from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + pcr_t2 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + pcr_t3 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t2 + serialization.ddl struct pcr_t2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Select Operator + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 2 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t3 + serialization.ddl struct pcr_t3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t3 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t2 + serialization.ddl struct pcr_t2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t2 + + Stage: Stage-4 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t3 + serialization.ddl struct pcr_t3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t3 + + Stage: Stage-5 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Output: default@pcr_t2 +PREHOOK: Output: default@pcr_t3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcr_t2 +POSTHOOK: Output: default@pcr_t3 +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain extended +from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + pcr_t1 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + pcr_t2 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 2 + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + pcr_t3 + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '2000-04-08' + = + TOK_TABLE_OR_COL + key + 3 + + +STAGE DEPENDENCIES: + Stage-2 is a root stage + Stage-3 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-3 + Stage-4 depends on stages: Stage-0 + Stage-1 depends on stages: Stage-3 + Stage-5 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-2 + Spark +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: pcr_t1 + Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 2) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 2 (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t2 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct pcr_t2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t2 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Filter Operator + isSamplingPred: false + predicate: (key = 3) (type: boolean) + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + Select Operator + expressions: 3 (type: int), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 2 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 0 Data size: 0 Basic stats: NONE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t3 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct pcr_t3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t3 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: ds=2000-04-08 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2000-04-08 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + numFiles 1 + numRows -1 + partition_columns ds + partition_columns.types string + rawDataSize -1 + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t1 + partition_columns ds + partition_columns.types string + serialization.ddl struct pcr_t1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t1 + name: default.pcr_t1 + Truncated Path -> Alias: + /pcr_t1/ds=2000-04-08 [pcr_t1] + + Stage: Stage-3 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t2 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct pcr_t2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t2 + + Stage: Stage-4 + Stats-Aggr Operator +#### A masked pattern was here #### + + Stage: Stage-1 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.pcr_t3 + numFiles 1 + numRows -1 + rawDataSize -1 + serialization.ddl struct pcr_t3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 180 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.pcr_t3 + + Stage: Stage-5 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_t1 +PREHOOK: Input: default@pcr_t1@ds=2000-04-08 +PREHOOK: Output: default@pcr_t2 +PREHOOK: Output: default@pcr_t3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: from pcr_t1 +insert overwrite table pcr_t2 select key, value where ds='2000-04-08' and key=2 +insert overwrite table pcr_t3 select key, value where ds='2000-04-08' and key=3 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Input: default@pcr_t1@ds=2000-04-08 +POSTHOOK: Output: default@pcr_t2 +POSTHOOK: Output: default@pcr_t3 +POSTHOOK: Lineage: pcr_t2.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t2.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: pcr_t3.key SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: pcr_t3.value SIMPLE [(pcr_t1)pcr_t1.FieldSchema(name:value, type:string, comment:null), ] +PREHOOK: query: explain extended select key, value from srcpart where ds='2008-04-08' and hr=11 order by key limit 10 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value from srcpart where ds='2008-04-08' and hr=11 order by key limit 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_WHERE + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + = + TOK_TABLE_OR_COL + hr + 11 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_LIMIT + 10 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: select key, value from srcpart where ds='2008-04-04' and hr=11 order by key limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +#### A masked pattern was here #### +POSTHOOK: query: select key, value from srcpart where ds='2008-04-04' and hr=11 order by key limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +#### A masked pattern was here #### +PREHOOK: query: explain extended select key, value, ds, hr from srcpart where ds='2008-04-08' and (hr='11' or hr='12') and key=11 order by key, ds, hr +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds, hr from srcpart where ds='2008-04-08' and (hr='11' or hr='12') and key=11 order by key, ds, hr +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_TABLE_OR_COL + hr + TOK_WHERE + and + and + = + TOK_TABLE_OR_COL + ds + '2008-04-08' + or + = + TOK_TABLE_OR_COL + hr + '11' + = + TOK_TABLE_OR_COL + hr + '12' + = + TOK_TABLE_OR_COL + key + 11 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + hr + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 11) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '11' (type: string), _col2 (type: string), _col3 (type: string) + sort order: +++ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-08/hr=12 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: '11' (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds, hr from srcpart where ds='2008-04-08' and (hr='11' or hr='12') and key=11 order by key, ds, hr +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds, hr from srcpart where ds='2008-04-08' and (hr='11' or hr='12') and key=11 order by key, ds, hr +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +11 val_11 2008-04-08 11 +11 val_11 2008-04-08 12 +PREHOOK: query: explain extended select key, value, ds, hr from srcpart where hr='11' and key=11 order by key, ds, hr +PREHOOK: type: QUERY +POSTHOOK: query: explain extended select key, value, ds, hr from srcpart where hr='11' and key=11 order by key, ds, hr +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_SELEXPR + TOK_TABLE_OR_COL + key + TOK_SELEXPR + TOK_TABLE_OR_COL + value + TOK_SELEXPR + TOK_TABLE_OR_COL + ds + TOK_SELEXPR + TOK_TABLE_OR_COL + hr + TOK_WHERE + and + = + TOK_TABLE_OR_COL + hr + '11' + = + TOK_TABLE_OR_COL + key + 11 + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + key + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + ds + TOK_TABSORTCOLNAMEASC + TOK_TABLE_OR_COL + hr + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: srcpart + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key = 11) (type: boolean) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), ds (type: string), hr (type: string) + outputColumnNames: _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: '11' (type: string), _col2 (type: string), _col3 (type: string) + sort order: +++ + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string) + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-09 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments defaultdefault + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [srcpart] + /srcpart/ds=2008-04-09/hr=11 [srcpart] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: '11' (type: string), VALUE._col0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types string:string:string:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select key, value, ds, hr from srcpart where hr='11' and key=11 order by key, ds, hr +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +POSTHOOK: query: select key, value, ds, hr from srcpart where hr='11' and key=11 order by key, ds, hr +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +#### A masked pattern was here #### +11 val_11 2008-04-08 11 +11 val_11 2008-04-09 11 +PREHOOK: query: drop table pcr_t1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pcr_t1 +PREHOOK: Output: default@pcr_t1 +POSTHOOK: query: drop table pcr_t1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pcr_t1 +POSTHOOK: Output: default@pcr_t1 +PREHOOK: query: drop table pcr_t2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pcr_t2 +PREHOOK: Output: default@pcr_t2 +POSTHOOK: query: drop table pcr_t2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pcr_t2 +POSTHOOK: Output: default@pcr_t2 +PREHOOK: query: drop table pcr_t3 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pcr_t3 +PREHOOK: Output: default@pcr_t3 +POSTHOOK: query: drop table pcr_t3 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pcr_t3 +POSTHOOK: Output: default@pcr_t3 +PREHOOK: query: -- Test cases when a non-boolean ds expression has same and different values for all possible ds values: +drop table pcr_foo +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Test cases when a non-boolean ds expression has same and different values for all possible ds values: +drop table pcr_foo +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table pcr_foo (key int, value string) partitioned by (ds int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@pcr_foo +POSTHOOK: query: create table pcr_foo (key int, value string) partitioned by (ds int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@pcr_foo +PREHOOK: query: insert overwrite table pcr_foo partition (ds=3) select * from src where key < 10 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_foo@ds=3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table pcr_foo partition (ds=3) select * from src where key < 10 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_foo@ds=3 +POSTHOOK: Lineage: pcr_foo PARTITION(ds=3).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_foo PARTITION(ds=3).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcr_foo partition (ds=5) select * from src where key < 10 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_foo@ds=5 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table pcr_foo partition (ds=5) select * from src where key < 10 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_foo@ds=5 +POSTHOOK: Lineage: pcr_foo PARTITION(ds=5).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_foo PARTITION(ds=5).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table pcr_foo partition (ds=7) select * from src where key < 10 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@pcr_foo@ds=7 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table pcr_foo partition (ds=7) select * from src where key < 10 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@pcr_foo@ds=7 +POSTHOOK: Lineage: pcr_foo PARTITION(ds=7).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: pcr_foo PARTITION(ds=7).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- the condition is 'true' for all the 3 partitions (ds=3,5,7): +select key, value, ds from pcr_foo where (ds % 2 == 1) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_foo +PREHOOK: Input: default@pcr_foo@ds=3 +PREHOOK: Input: default@pcr_foo@ds=5 +PREHOOK: Input: default@pcr_foo@ds=7 +#### A masked pattern was here #### +POSTHOOK: query: -- the condition is 'true' for all the 3 partitions (ds=3,5,7): +select key, value, ds from pcr_foo where (ds % 2 == 1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_foo +POSTHOOK: Input: default@pcr_foo@ds=3 +POSTHOOK: Input: default@pcr_foo@ds=5 +POSTHOOK: Input: default@pcr_foo@ds=7 +#### A masked pattern was here #### +0 val_0 3 +0 val_0 3 +0 val_0 3 +2 val_2 3 +4 val_4 3 +5 val_5 3 +5 val_5 3 +5 val_5 3 +8 val_8 3 +9 val_9 3 +0 val_0 5 +0 val_0 5 +0 val_0 5 +2 val_2 5 +4 val_4 5 +5 val_5 5 +5 val_5 5 +5 val_5 5 +8 val_8 5 +9 val_9 5 +0 val_0 7 +0 val_0 7 +0 val_0 7 +2 val_2 7 +4 val_4 7 +5 val_5 7 +5 val_5 7 +5 val_5 7 +8 val_8 7 +9 val_9 7 +PREHOOK: query: -- the condition is 'true' for partitions (ds=3,5) but 'false' of partition ds=7: +select key, value, ds from pcr_foo where (ds / 3 < 2) +PREHOOK: type: QUERY +PREHOOK: Input: default@pcr_foo +PREHOOK: Input: default@pcr_foo@ds=3 +PREHOOK: Input: default@pcr_foo@ds=5 +#### A masked pattern was here #### +POSTHOOK: query: -- the condition is 'true' for partitions (ds=3,5) but 'false' of partition ds=7: +select key, value, ds from pcr_foo where (ds / 3 < 2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@pcr_foo +POSTHOOK: Input: default@pcr_foo@ds=3 +POSTHOOK: Input: default@pcr_foo@ds=5 +#### A masked pattern was here #### +0 val_0 3 +0 val_0 3 +0 val_0 3 +2 val_2 3 +4 val_4 3 +5 val_5 3 +5 val_5 3 +5 val_5 3 +8 val_8 3 +9 val_9 3 +0 val_0 5 +0 val_0 5 +0 val_0 5 +2 val_2 5 +4 val_4 5 +5 val_5 5 +5 val_5 5 +5 val_5 5 +8 val_8 5 +9 val_9 5 +PREHOOK: query: drop table pcr_foo +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@pcr_foo +PREHOOK: Output: default@pcr_foo +POSTHOOK: query: drop table pcr_foo +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@pcr_foo +POSTHOOK: Output: default@pcr_foo +PREHOOK: query: -- Cover org.apache.hadoop.hive.ql.optimizer.pcr.PcrExprProcFactory.FieldExprProcessor. +-- Create a table with a struct data: +create table ab(strct struct) +row format delimited + fields terminated by '\t' + collection items terminated by '\001' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@ab +POSTHOOK: query: -- Cover org.apache.hadoop.hive.ql.optimizer.pcr.PcrExprProcFactory.FieldExprProcessor. +-- Create a table with a struct data: +create table ab(strct struct) +row format delimited + fields terminated by '\t' + collection items terminated by '\001' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@ab +PREHOOK: query: load data local inpath '../../data/files/kv1.txt' +overwrite into table ab +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@ab +POSTHOOK: query: load data local inpath '../../data/files/kv1.txt' +overwrite into table ab +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@ab +PREHOOK: query: -- Create partitioned table with struct data: +drop table foo_field +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- Create partitioned table with struct data: +drop table foo_field +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table foo_field (s struct) partitioned by (ds int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@foo_field +POSTHOOK: query: create table foo_field (s struct) partitioned by (ds int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@foo_field +PREHOOK: query: insert overwrite table foo_field partition (ds=5) select strct from ab where strct.a < 10 limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@ab +PREHOOK: Output: default@foo_field@ds=5 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table foo_field partition (ds=5) select strct from ab where strct.a < 10 limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ab +POSTHOOK: Output: default@foo_field@ds=5 +POSTHOOK: Lineage: foo_field PARTITION(ds=5).s SIMPLE [(ab)ab.FieldSchema(name:strct, type:struct, comment:null), ] +PREHOOK: query: insert overwrite table foo_field partition (ds=7) select strct from ab where strct.a > 190 limit 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@ab +PREHOOK: Output: default@foo_field@ds=7 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: insert overwrite table foo_field partition (ds=7) select strct from ab where strct.a > 190 limit 2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@ab +POSTHOOK: Output: default@foo_field@ds=7 +POSTHOOK: Lineage: foo_field PARTITION(ds=7).s SIMPLE [(ab)ab.FieldSchema(name:strct, type:struct, comment:null), ] +PREHOOK: query: select s,ds from foo_field where ((ds + s.a) > 0) order by ds,s +PREHOOK: type: QUERY +PREHOOK: Input: default@foo_field +PREHOOK: Input: default@foo_field@ds=5 +PREHOOK: Input: default@foo_field@ds=7 +#### A masked pattern was here #### +POSTHOOK: query: select s,ds from foo_field where ((ds + s.a) > 0) order by ds,s +POSTHOOK: type: QUERY +POSTHOOK: Input: default@foo_field +POSTHOOK: Input: default@foo_field@ds=5 +POSTHOOK: Input: default@foo_field@ds=7 +#### A masked pattern was here #### +{"a":0,"b":"val_0"} 5 +{"a":4,"b":"val_4"} 5 +{"a":238,"b":"val_238"} 7 +{"a":311,"b":"val_311"} 7 +PREHOOK: query: drop table foo_field +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@foo_field +PREHOOK: Output: default@foo_field +POSTHOOK: query: drop table foo_field +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@foo_field +POSTHOOK: Output: default@foo_field diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out new file mode 100644 index 0000000..0319137 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_13.q.out @@ -0,0 +1,638 @@ +PREHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are joined on columns with different names + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are joined on columns with different names + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (value INT, key STRING) CLUSTERED BY (value) SORTED BY (value ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table3 +POSTHOOK: query: CREATE TABLE test_table3 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table3 +PREHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table4 +POSTHOOK: query: CREATE TABLE test_table4 (key INT, value STRING) CLUSTERED BY (key, value) SORTED BY (value ASC, key ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table4 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +INSERT OVERWRITE TABLE test_table3 SELECT * +INSERT OVERWRITE TABLE test_table4 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +PREHOOK: Output: default@test_table2 +PREHOOK: Output: default@test_table3 +PREHOOK: Output: default@test_table4 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +INSERT OVERWRITE TABLE test_table3 SELECT * +INSERT OVERWRITE TABLE test_table4 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Output: default@test_table3 +POSTHOOK: Output: default@test_table4 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table3.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table4.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Join data from 2 tables on their respective sorted columns (one each, with different names) and +-- verify sort merge join is used +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- Join data from 2 tables on their respective sorted columns (one each, with different names) and +-- verify sort merge join is used +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: value is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: int) + sort order: + + Map-reduce partition columns: value (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name value + columns value,key + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 value, string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name value + columns value,key + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 value, string key} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + name: default.test_table2 + Truncated Path -> Alias: + /test_table2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 1080 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.value ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +PREHOOK: query: -- Join data from 2 tables on their respective columns (two each, with the same names but sorted +-- with different priorities) and verify sort merge join is not used +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- Join data from 2 tables on their respective columns (two each, with the same names but sorted +-- with different priorities) and verify sort merge join is not used +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table3 + a + TOK_TABREF + TOK_TABNAME + test_table4 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: UDFToDouble(value) is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(value) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(value) (type: double) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key (type: int), value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table4 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table4 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table4 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table4 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table4 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table4 + name: default.test_table4 + Truncated Path -> Alias: + /test_table4 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: UDFToDouble(key) is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: UDFToDouble(key) (type: double) + sort order: + + Map-reduce partition columns: UDFToDouble(key) (type: double) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key (type: int), value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table3 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table3 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table3 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table3 + name: default.test_table3 + Truncated Path -> Alias: + /test_table3 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} {VALUE._col1} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 1080 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table3 +PREHOOK: Input: default@test_table4 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table3 a JOIN test_table4 b ON a.key = b.value ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table3 +POSTHOOK: Input: default@test_table4 +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out new file mode 100644 index 0000000..7849e78 --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_15.q.out @@ -0,0 +1,1294 @@ +PREHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are sorted on columns which is a superset +-- of join columns + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- This test verifies that the sort merge join optimizer works when the tables are sorted on columns which is a superset +-- of join columns + +-- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, value ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +PREHOOK: Output: default@test_table2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- it should be converted to a sort-merge join, since the first sort column (#join columns = 1) contains the join columns +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- it should be converted to a sort-merge join, since the first sort column (#join columns = 1) contains the join columns +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + name: default.test_table2 + Truncated Path -> Alias: + /test_table2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 55 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: key is not null (type: boolean) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 28 Data size: 2958 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,value + columns.comments + columns.types int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col5, _col6 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 3253 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1080 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 1080 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3 + columns.types int:string:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +2 val_2 2 val_2 +PREHOOK: query: DROP TABLE test_table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table1 +POSTHOOK: query: DROP TABLE test_table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table1 +PREHOOK: query: DROP TABLE test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: DROP TABLE test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, key2 INT, value STRING) CLUSTERED BY (key) SORTED BY (key ASC, key2 ASC, value ASC) INTO 16 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT key, key, value +INSERT OVERWRITE TABLE test_table2 SELECT key, key, value +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +PREHOOK: Output: default@test_table2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT key, key, value +INSERT OVERWRITE TABLE test_table2 SELECT key, key, value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + key2 + . + TOK_TABLE_OR_COL + b + key2 + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and key2 is not null) (type: boolean) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + name: default.test_table2 + Truncated Path -> Alias: + /test_table2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and key2 is not null) (type: boolean) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), key2 (type: int) + sort order: ++ + Map-reduce partition columns: key (type: int), key2 (type: int) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + 1 {KEY.reducesinkkey0} {KEY.reducesinkkey1} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1130 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 1130 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:string:int:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.key2 = b.key2 ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +2 2 val_2 2 2 val_2 +PREHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns +-- even if the order is not the same +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- it should be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) contain the join columns +-- even if the order is not the same +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key2 + . + TOK_TABLE_OR_COL + b + key2 + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key2 is not null and key is not null) (type: boolean) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key2 (type: int), key (type: int) + sort order: ++ + Map-reduce partition columns: key2 (type: int), key (type: int) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + name: default.test_table2 + Truncated Path -> Alias: + /test_table2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key2 is not null and key is not null) (type: boolean) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key2 (type: int), key (type: int) + sort order: ++ + Map-reduce partition columns: key2 (type: int), key (type: int) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: value (type: string) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + 1 {KEY.reducesinkkey1} {KEY.reducesinkkey0} {VALUE._col0} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1130 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 1130 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:string:int:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key2 = b.key2 and a.key = b.key ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +2 2 val_2 2 2 val_2 +PREHOOK: query: -- it should not be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) do not contain all +-- the join columns +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: -- it should not be converted to a sort-merge join, since the first 2 sort columns (#join columns = 2) do not contain all +-- the join columns +EXPLAIN EXTENDED +SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_JOIN + TOK_TABREF + TOK_TABNAME + test_table1 + a + TOK_TABREF + TOK_TABNAME + test_table2 + b + and + = + . + TOK_TABLE_OR_COL + a + key + . + TOK_TABLE_OR_COL + b + key + = + . + TOK_TABLE_OR_COL + a + value + . + TOK_TABLE_OR_COL + b + value + TOK_INSERT + TOK_DESTINATION + TOK_DIR + TOK_TMP_FILE + TOK_SELECT + TOK_HINTLIST + TOK_HINT + TOK_MAPJOIN + TOK_HINTARGLIST + b + TOK_SELEXPR + TOK_ALLCOLREF + TOK_ORDERBY + TOK_TABSORTCOLNAMEASC + . + TOK_TABLE_OR_COL + a + key + TOK_LIMIT + 10 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: string) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + tag: 1 + value expressions: key2 (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table2 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table2 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table2 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table2 + name: default.test_table2 + Truncated Path -> Alias: + /test_table2 [b] + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 71 Data size: 7718 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int), value (type: string) + sort order: ++ + Map-reduce partition columns: key (type: int), value (type: string) + Statistics: Num rows: 18 Data size: 1956 Basic stats: COMPLETE Column stats: NONE + tag: 0 + value expressions: key2 (type: int) + auto parallelism: true + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: test_table1 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + COLUMN_STATS_ACCURATE true + SORTBUCKETCOLSPREFIX TRUE + bucket_count 16 + bucket_field_name key + columns key,key2,value + columns.comments + columns.types int:int:string +#### A masked pattern was here #### + name default.test_table1 + numFiles 16 + numRows -1 + rawDataSize -1 + serialization.ddl struct test_table1 { i32 key, i32 key2, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 7718 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.test_table1 + name: default.test_table1 + Truncated Path -> Alias: + /test_table1 [a] + Reducer 2 + Needs Tagging: true + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {KEY.reducesinkkey0} {VALUE._col0} {KEY.reducesinkkey1} + 1 {KEY.reducesinkkey0} {VALUE._col0} {KEY.reducesinkkey1} + outputColumnNames: _col0, _col1, _col2, _col6, _col7, _col8 + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col6 (type: int), _col7 (type: int), _col8 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + tag: -1 + value expressions: _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + auto parallelism: false + Reducer 3 + Needs Tagging: false + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 19 Data size: 2151 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1130 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 0 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 10 Data size: 1130 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5 + columns.types int:int:string:int:int:string + escape.delim \ + hive.serialization.extend.nesting.levels true + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + TotalFiles: 1 + GatherStats: false + MultiFileSpray: false + + Stage: Stage-0 + Fetch Operator + limit: 10 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ * FROM test_table1 a JOIN test_table2 b ON a.key = b.key and a.value = b.value ORDER BY a.key LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +0 0 val_0 0 0 val_0 +2 2 val_2 2 2 val_2 +PREHOOK: query: DROP TABLE test_table1 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table1 +PREHOOK: Output: default@test_table1 +POSTHOOK: query: DROP TABLE test_table1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table1 +POSTHOOK: Output: default@test_table1 +PREHOOK: query: DROP TABLE test_table2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_table2 +PREHOOK: Output: default@test_table2 +POSTHOOK: query: DROP TABLE test_table2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_table2 +POSTHOOK: Output: default@test_table2 diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out new file mode 100644 index 0000000..11ffefd --- /dev/null +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_16.q.out @@ -0,0 +1,140 @@ +PREHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table1 +POSTHOOK: query: -- Create bucketed and sorted tables +CREATE TABLE test_table1 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table1 +PREHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_table2 +POSTHOOK: query: CREATE TABLE test_table2 (key INT, value STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_table2 +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_table1 +PREHOOK: Output: default@test_table2 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE test_table1 SELECT * +INSERT OVERWRITE TABLE test_table2 SELECT * +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_table1 +POSTHOOK: Output: default@test_table2 +POSTHOOK: Lineage: test_table1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table1.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: test_table2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: -- Mapjoin followed by a aggregation should be performed in a single MR job +EXPLAIN +SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1), Map 4 (GROUP PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Map 4 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 1453 Data size: 5812 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 727 Data size: 2908 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 799 Data size: 3198 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@test_table1 +PREHOOK: Input: default@test_table2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT /*+mapjoin(b)*/ count(*) FROM test_table1 a JOIN test_table2 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_table1 +POSTHOOK: Input: default@test_table2 +#### A masked pattern was here #### +1028 diff --git ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out new file mode 100644 index 0000000..844d36f --- /dev/null +++ ql/src/test/results/clientpositive/spark/table_access_keys_stats.q.out @@ -0,0 +1,551 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS +-- This test is used for testing the TableAccessAnalyzer + +CREATE TABLE T1(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/T1.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +PREHOOK: query: CREATE TABLE T2(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T2 +PREHOOK: query: CREATE TABLE T3(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T3 +PREHOOK: query: -- Simple group-by queries +SELECT key, count(1) FROM T1 GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_2 +Table:default@t1 +Keys:key + +1 1 +2 1 +3 1 +7 1 +8 2 +PREHOOK: query: SELECT key, val, count(1) FROM T1 GROUP BY key, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_2 +Table:default@t1 +Keys:key,val + +1 11 1 +2 12 1 +3 13 1 +7 17 1 +8 18 1 +8 28 1 +PREHOOK: query: -- With subqueries and column aliases +SELECT key, count(1) FROM (SELECT key, val FROM T1) subq1 GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_3 +Table:default@t1 +Keys:key + +1 1 +2 1 +3 1 +7 1 +8 2 +PREHOOK: query: SELECT k, count(1) FROM (SELECT key as k, val as v FROM T1) subq1 GROUP BY k +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_3 +Table:default@t1 +Keys:key + +1 1 +2 1 +3 1 +7 1 +8 2 +PREHOOK: query: -- With constants +SELECT 1, key, count(1) FROM T1 GROUP BY 1, key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_2 +Table:default@t1 +Keys:key + +1 1 1 +1 2 1 +1 3 1 +1 7 1 +1 8 2 +PREHOOK: query: SELECT key, 1, val, count(1) FROM T1 GROUP BY key, 1, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_2 +Table:default@t1 +Keys:key,val + +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 +8 1 18 1 +8 1 28 1 +PREHOOK: query: SELECT key, 1, val, 2, count(1) FROM T1 GROUP BY key, 1, val, 2 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_2 +Table:default@t1 +Keys:key,val + +1 1 11 2 1 +2 1 12 2 1 +3 1 13 2 1 +7 1 17 2 1 +8 1 18 2 1 +8 1 28 2 1 +PREHOOK: query: -- no mapping with functions +SELECT key, key + 1, count(1) FROM T1 GROUP BY key, key + 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +1 2.0 1 +2 3.0 1 +3 4.0 1 +7 8.0 1 +8 9.0 2 +PREHOOK: query: SELECT key + key, sum(cnt) from +(SELECT key, count(1) as cnt FROM T1 GROUP BY key) subq1 +group by key + key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_2 +Table:default@t1 +Keys:key + +14.0 1 +16.0 2 +2.0 1 +4.0 1 +6.0 1 +PREHOOK: query: -- group by followed by union +SELECT * FROM ( +SELECT key, count(1) as c FROM T1 GROUP BY key + UNION ALL +SELECT key, count(1) as c FROM T1 GROUP BY key +) subq1 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_2 +Table:default@t1 +Keys:key + +Operator:GBY_8 +Table:default@t1 +Keys:key + +1 1 +1 1 +2 1 +2 1 +3 1 +3 1 +7 1 +7 1 +8 2 +8 2 +PREHOOK: query: -- group by followed by a join +SELECT * FROM +(SELECT key, count(1) as c FROM T1 GROUP BY key) subq1 +JOIN +(SELECT key, count(1) as c FROM T1 GROUP BY key) subq2 +ON subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_2 +Table:default@t1 +Keys:key + +Operator:GBY_8 +Table:default@t1 +Keys:key + +1 1 1 1 +2 1 2 1 +3 1 3 1 +7 1 7 1 +8 2 8 2 +PREHOOK: query: SELECT * FROM +(SELECT key, count(1) as c FROM T1 GROUP BY key) subq1 +JOIN +(SELECT key, val, count(1) as c FROM T1 GROUP BY key, val) subq2 +ON subq1.key = subq2.key +ORDER BY subq1.key ASC, subq1.c ASC, subq2.key ASC, subq2.val ASC, subq2.c ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_2 +Table:default@t1 +Keys:key,val + +Operator:GBY_8 +Table:default@t1 +Keys:key + +1 1 1 11 1 +2 1 2 12 1 +3 1 3 13 1 +7 1 7 17 1 +8 2 8 18 1 +8 2 8 28 1 +PREHOOK: query: -- constants from sub-queries should work fine +SELECT key, constant, val, count(1) from +(SELECT key, 1 as constant, val from T1) subq1 +group by key, constant, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_3 +Table:default@t1 +Keys:key,val + +1 1 11 1 +2 1 12 1 +3 1 13 1 +7 1 17 1 +8 1 18 1 +8 1 28 1 +PREHOOK: query: -- multiple levels of constants from sub-queries should work fine +SELECT key, constant3, val, count(1) FROM +( + SELECT key, constant AS constant2, val, 2 AS constant3 + FROM + ( + SELECT key, 1 AS constant, val + FROM T1 + ) subq +) subq2 +GROUP BY key, constant3, val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +Operator:GBY_4 +Table:default@t1 +Keys:key,val + +1 2 11 1 +2 2 12 1 +3 2 13 1 +7 2 17 1 +8 2 18 1 +8 2 28 1 +PREHOOK: query: -- work with insert overwrite +FROM T1 +INSERT OVERWRITE TABLE T2 SELECT key, count(1) GROUP BY key, 1 +INSERT OVERWRITE TABLE T3 SELECT key, sum(val) GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Output: default@t2 +PREHOOK: Output: default@t3 +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +[Error 30017]: Skipping stats aggregation by error org.apache.hadoop.hive.ql.metadata.HiveException: [Error 30015]: Stats aggregator of type counter cannot be connected to +Operator:GBY_2 +Table:default@t1 +Keys:key + +Operator:GBY_8 +Table:default@t1 +Keys:key + +PREHOOK: query: -- simple joins +SELECT * +FROM T1 JOIN T2 +ON T1.key = t2.key +ORDER BY T1.key ASC, T1.val ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_6 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:key + +1 11 1 1 +2 12 2 1 +3 13 3 1 +7 17 7 1 +8 18 8 2 +8 28 8 2 +PREHOOK: query: SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key AND T1.val = T2.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_6 +Table:default@t2 +Keys:key,val +Table:default@t1 +Keys:key,val + +PREHOOK: query: -- map join +SELECT /*+ MAPJOIN(a) */ * +FROM T1 a JOIN T2 b +ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_6 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:key + +1 11 1 1 +2 12 2 1 +3 13 3 1 +7 17 7 1 +8 18 8 2 +8 28 8 2 +PREHOOK: query: -- with constant in join condition +SELECT * +FROM T1 JOIN T2 +ON T1.key = T2.key AND T1.val = 3 and T2.val = 3 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_6 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:key + +PREHOOK: query: -- subqueries +SELECT * +FROM +( + SELECT val FROM T1 WHERE key = 5 +) subq1 +JOIN +( + SELECT val FROM T2 WHERE key = 6 +) subq2 +ON subq1.val = subq2.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_10 +Table:default@t2 +Keys:val +Table:default@t1 +Keys:val + +PREHOOK: query: SELECT * +FROM +( + SELECT val FROM T1 WHERE key = 5 +) subq1 +JOIN +T2 +ON subq1.val = T2.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_8 +Table:default@t2 +Keys:val +Table:default@t1 +Keys:val + +PREHOOK: query: -- with column aliases in subqueries +SELECT * +FROM +( + SELECT val as v FROM T1 WHERE key = 5 +) subq1 +JOIN +( + SELECT val FROM T2 WHERE key = 6 +) subq2 +ON subq1.v = subq2.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_10 +Table:default@t2 +Keys:val +Table:default@t1 +Keys:val + +PREHOOK: query: -- with constants in subqueries +SELECT * +FROM +( + SELECT key, val FROM T1 +) subq1 +JOIN +( + SELECT key, 'teststring' as val FROM T2 +) subq2 +ON subq1.val = subq2.val AND subq1.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_8 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:val,key + +PREHOOK: query: -- multiple levels of constants in subqueries +SELECT * +FROM +( + SELECT key, val from + ( + SELECT key, 'teststring' as val from T1 + ) subq1 +) subq2 +JOIN +( + SELECT key, val FROM T2 +) subq3 +ON subq3.val = subq2.val AND subq3.key = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_9 +Table:default@t2 +Keys:val,key +Table:default@t1 +Keys:key + +PREHOOK: query: -- no mapping on functions +SELECT * +FROM +( + SELECT key, val from T1 +) subq1 +JOIN +( + SELECT key, val FROM T2 +) subq2 +ON subq1.val = subq2.val AND subq1.key + 1 = subq2.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: -- join followed by group by +SELECT subq1.val, COUNT(*) +FROM +( + SELECT key, val FROM T1 +) subq1 +JOIN +( + SELECT key, 'teststring' as val FROM T2 +) subq2 +ON subq1.val = subq2.val AND subq1.key = subq2.key +GROUP BY subq1.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +Operator:JOIN_8 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:val,key + +PREHOOK: query: -- join followed by union +SELECT * +FROM +( + SELECT subq1.val, COUNT(*) + FROM + ( + SELECT key, val FROM T1 + ) subq1 + JOIN + ( + SELECT key, 'teststring' as val FROM T2 + ) subq2 + ON subq1.val = subq2.val AND subq1.key = subq2.key + GROUP BY subq1.val + UNION ALL + SELECT val, COUNT(*) + FROM T3 + GROUP BY val +) subq4 +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +Operator:GBY_16 +Table:default@t3 +Keys:val + +Operator:JOIN_8 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:val,key + +11.0 1 +12.0 1 +13.0 1 +17.0 1 +46.0 1 +PREHOOK: query: -- join followed by join +SELECT * +FROM +( + SELECT subq1.val as val, COUNT(*) + FROM + ( + SELECT key, val FROM T1 + ) subq1 + JOIN + ( + SELECT key, 'teststring' as val FROM T2 + ) subq2 + ON subq1.val = subq2.val AND subq1.key = subq2.key + GROUP by subq1.val +) T4 +JOIN T3 +ON T3.val = T4.val +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +Operator:JOIN_8 +Table:default@t2 +Keys:key +Table:default@t1 +Keys:val,key +