diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index acca68dd4163137b3a8817c9e32672801ab77103..4f2de12040fd18bcc4790ca3a0279cc5d6cc1905 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -654,7 +654,16 @@ spark.query.files=add_part_multiple.q, \ groupby1.q, \ groupby10.q, \ groupby11.q, \ + groupby1_map.q, \ + groupby1_map_nomap.q, \ + groupby1_map_skew.q, \ + groupby1_noskew.q, \ groupby2.q, \ + groupby2_map.q, \ + groupby2_map_multi_distinct.q, \ + groupby2_map_skew.q, \ + groupby2_noskew.q, \ + groupby2_noskew_multi_distinct.q, \ groupby3.q, \ groupby3_map.q, \ groupby3_map_multi_distinct.q, \ @@ -662,6 +671,17 @@ spark.query.files=add_part_multiple.q, \ groupby3_noskew.q, \ groupby3_noskew_multi_distinct.q, \ groupby4.q, \ + groupby4_map.q, \ + groupby4_map_skew.q, \ + groupby4_noskew.q, \ + groupby5.q, \ + groupby5_map.q, \ + groupby5_map_skew.q, \ + groupby5_noskew.q, \ + groupby6.q, \ + groupby6_map.q, \ + groupby6_map_skew.q, \ + groupby6_noskew.q, \ groupby7.q, \ groupby7_map.q, \ groupby7_map_multi_single_reducer.q, \ @@ -677,6 +697,7 @@ spark.query.files=add_part_multiple.q, \ groupby_complex_types.q, \ groupby_complex_types_multi_single_reducer.q, \ groupby_cube1.q, \ + groupby_grouping_id2.q, \ groupby_map_ppr.q, \ groupby_map_ppr_multi_distinct.q, \ groupby_multi_insert_common_distinct.q, \ @@ -685,8 +706,11 @@ spark.query.files=add_part_multiple.q, \ groupby_multi_single_reducer3.q, \ groupby_position.q, \ groupby_ppr.q, \ + groupby_ppr_multi_distinct.q, \ + groupby_resolution.q, \ groupby_rollup1.q, \ groupby_sort_1_23.q, \ + groupby_sort_skew_1.q, \ groupby_sort_skew_1_23.q, \ having.q, \ identity_project_remove_skip.q, \ @@ -826,6 +850,10 @@ spark.query.files=add_part_multiple.q, \ multi_join_union.q, \ multi_join_union_src.q, \ multigroupby_singlemr.q, \ + nullgroup.q, \ + nullgroup2.q, \ + nullgroup4.q, \ + nullgroup4_multi_distinct.q, \ optimize_nullscan.q, \ order.q, \ order2.q, \ @@ -953,6 +981,7 @@ spark.query.files=add_part_multiple.q, \ subquery_multiinsert.q, \ table_access_keys_stats.q, \ temp_table.q, \ + temp_table_gb1.q, \ temp_table_join1.q, \ tez_join_tests.q, \ tez_joins_explain.q, \ @@ -967,8 +996,12 @@ spark.query.files=add_part_multiple.q, \ transform2.q, \ transform_ppr1.q, \ transform_ppr2.q, \ + udaf_collect_set.q, \ udf_example_add.q, \ udf_in_file.q, \ + udf_max.q, \ + udf_min.q, \ + udf_percentile.q, \ union.q, \ union10.q, \ union11.q, \ @@ -1036,8 +1069,8 @@ spark.query.files=add_part_multiple.q, \ union_remove_9.q, \ union_script.q, \ union_top_level.q, \ - uniquejoin.q, \ union_view.q, \ + uniquejoin.q, \ varchar_join1.q, \ vector_between_in.q, \ vector_cast_constant.q, \ @@ -1132,4 +1165,8 @@ miniSparkOnYarn.query.files=auto_sortmerge_join_16.q,\ truncate_column_buckets.q,\ uber_reduce.q -spark.query.negative.files=groupby2_map_skew_multi_distinct.q +spark.query.negative.files=groupby2_map_skew_multi_distinct.q,\ + groupby2_multi_distinct.q,\ + groupby3_map_skew_multi_distinct.q,\ + groupby3_multi_distinct.q,\ + groupby_grouping_sets7.q diff --git ql/src/test/results/clientnegative/spark/groupby2_multi_distinct.q.out ql/src/test/results/clientnegative/spark/groupby2_multi_distinct.q.out new file mode 100644 index 0000000000000000000000000000000000000000..b219fe60cd335d0fee00cf499f8c6436a1ff2c71 --- /dev/null +++ ql/src/test/results/clientnegative/spark/groupby2_multi_distinct.q.out @@ -0,0 +1,9 @@ +PREHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g2 +FAILED: SemanticException [Error 10022]: DISTINCT on different columns not supported with skew in data diff --git ql/src/test/results/clientnegative/spark/groupby3_map_skew_multi_distinct.q.out ql/src/test/results/clientnegative/spark/groupby3_map_skew_multi_distinct.q.out new file mode 100644 index 0000000000000000000000000000000000000000..3c5506b3132d7910d7ef483ee9badabce5cca3c5 --- /dev/null +++ ql/src/test/results/clientnegative/spark/groupby3_map_skew_multi_distinct.q.out @@ -0,0 +1,9 @@ +PREHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +FAILED: SemanticException [Error 10022]: DISTINCT on different columns not supported with skew in data diff --git ql/src/test/results/clientnegative/spark/groupby3_multi_distinct.q.out ql/src/test/results/clientnegative/spark/groupby3_multi_distinct.q.out new file mode 100644 index 0000000000000000000000000000000000000000..3c5506b3132d7910d7ef483ee9badabce5cca3c5 --- /dev/null +++ ql/src/test/results/clientnegative/spark/groupby3_multi_distinct.q.out @@ -0,0 +1,9 @@ +PREHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 DOUBLE, c2 DOUBLE, c3 DOUBLE, c4 DOUBLE, c5 DOUBLE, c6 DOUBLE, c7 DOUBLE, c8 DOUBLE, c9 DOUBLE, c10 DOUBLE, c11 DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +FAILED: SemanticException [Error 10022]: DISTINCT on different columns not supported with skew in data diff --git ql/src/test/results/clientnegative/spark/groupby_grouping_sets7.q.out ql/src/test/results/clientnegative/spark/groupby_grouping_sets7.q.out new file mode 100644 index 0000000000000000000000000000000000000000..226de5ab1addbd9c9f620e329e0dc061eeb1c4ea --- /dev/null +++ ql/src/test/results/clientnegative/spark/groupby_grouping_sets7.q.out @@ -0,0 +1,9 @@ +PREHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(a STRING, b STRING, c STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +FAILED: SemanticException [Error 10225]: An additional MR job is introduced since the number of rows created per input row due to grouping sets is more than hive.new.job.grouping.set.cardinality. There is no need to handle skew separately. set hive.groupby.skewindata to false. The number of rows per input row due to grouping sets is 4 diff --git ql/src/test/results/clientpositive/spark/groupby1_map.q.out ql/src/test/results/clientpositive/spark/groupby1_map.q.out new file mode 100644 index 0000000000000000000000000000000000000000..d240c9841761e59f281ae14617004d7676338232 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby1_map.q.out @@ -0,0 +1,412 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 diff --git ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out new file mode 100644 index 0000000000000000000000000000000000000000..8fd9661b496e0610cbae7cebf6cdd7a31d210d71 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby1_map_nomap.q.out @@ -0,0 +1,408 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +273 819.0 +275 275.0 +419 419.0 +118 236.0 +202 202.0 +282 564.0 +82 82.0 +116 116.0 +345 345.0 +332 332.0 +19 19.0 +42 84.0 +459 918.0 +190 190.0 +257 257.0 +134 268.0 +165 330.0 +138 552.0 +222 222.0 +163 163.0 +219 438.0 +411 411.0 +305 305.0 +479 479.0 +28 28.0 +318 954.0 +244 244.0 +208 624.0 +136 136.0 +24 48.0 +239 478.0 +84 168.0 +11 11.0 +367 734.0 +288 576.0 +150 150.0 +402 402.0 +466 1398.0 +224 448.0 +237 474.0 +105 105.0 +484 484.0 +20 20.0 +400 400.0 +97 194.0 +280 560.0 +255 510.0 +103 206.0 +242 484.0 +323 323.0 +309 618.0 +365 365.0 +178 178.0 +26 52.0 +404 808.0 +196 196.0 +448 448.0 +462 924.0 +389 389.0 +338 338.0 +167 501.0 +493 493.0 +33 33.0 +152 304.0 +477 477.0 +431 1293.0 +316 948.0 +125 250.0 +444 444.0 +457 457.0 +446 446.0 +310 310.0 +129 258.0 +183 183.0 +392 392.0 +277 1108.0 +4 4.0 +80 80.0 +228 228.0 +145 145.0 +356 356.0 +284 284.0 +455 455.0 +53 53.0 +149 298.0 +424 848.0 +37 74.0 +286 286.0 +327 981.0 +170 170.0 +187 561.0 +86 86.0 +291 291.0 +233 466.0 +439 878.0 +266 266.0 +2 2.0 +396 1188.0 +336 336.0 +226 226.0 +176 352.0 +66 66.0 +497 497.0 +172 344.0 +491 491.0 +44 44.0 +200 400.0 +235 235.0 +77 77.0 +260 260.0 +406 1624.0 +460 460.0 +495 495.0 +143 143.0 +189 189.0 +453 453.0 +64 64.0 +158 158.0 +341 341.0 +475 475.0 +8 8.0 +394 394.0 +57 57.0 +169 676.0 +15 30.0 +35 105.0 +174 348.0 +325 650.0 +0 0.0 +248 248.0 +468 1872.0 +435 435.0 +51 102.0 +321 642.0 +413 826.0 +369 1107.0 +480 1440.0 +156 156.0 +192 192.0 +213 426.0 +374 374.0 +437 437.0 +17 17.0 +181 181.0 +482 482.0 +307 614.0 +194 194.0 +217 434.0 +95 190.0 +114 114.0 +262 262.0 +378 378.0 +417 1251.0 +281 562.0 +180 180.0 +467 467.0 +201 201.0 +432 432.0 +238 476.0 +96 96.0 +386 386.0 +283 283.0 +168 168.0 +209 418.0 +463 926.0 +377 377.0 +317 634.0 +252 252.0 +104 208.0 +373 373.0 +131 131.0 +494 494.0 +230 1150.0 +83 166.0 +191 382.0 +41 41.0 +193 579.0 +436 436.0 +496 496.0 +166 166.0 +229 458.0 +298 894.0 +133 133.0 +333 666.0 +65 65.0 +292 292.0 +364 364.0 +472 472.0 +274 274.0 +47 47.0 +401 2005.0 +67 134.0 +5 15.0 +18 36.0 +27 27.0 +344 688.0 +409 1227.0 +256 512.0 +85 85.0 +72 144.0 +54 54.0 +393 393.0 +160 160.0 +438 1314.0 +263 263.0 +351 351.0 +207 414.0 +449 449.0 +111 111.0 +128 384.0 +289 289.0 +399 798.0 +489 1956.0 +205 410.0 +177 177.0 +119 357.0 +331 662.0 +348 1740.0 +478 956.0 +76 152.0 +458 916.0 +382 764.0 +157 157.0 +315 315.0 +469 2345.0 +302 302.0 +395 790.0 +384 1152.0 +162 162.0 +113 226.0 +98 196.0 +221 442.0 +203 406.0 +199 597.0 +454 1362.0 +218 218.0 +241 241.0 +272 544.0 +120 240.0 +403 1209.0 +366 366.0 +249 249.0 +421 421.0 +214 214.0 +92 92.0 +487 487.0 +258 258.0 +429 858.0 +265 530.0 +175 350.0 +34 34.0 +368 368.0 +69 69.0 +414 828.0 +30 30.0 +492 984.0 +9 9.0 +296 296.0 +311 933.0 +247 247.0 +164 328.0 +306 306.0 +153 153.0 +339 339.0 +322 644.0 +10 10.0 +430 1290.0 +155 155.0 +452 452.0 +179 358.0 +485 485.0 +490 490.0 +443 443.0 +379 379.0 +186 186.0 +100 200.0 +137 274.0 +483 483.0 +90 270.0 +481 481.0 +287 287.0 +146 292.0 +216 432.0 +342 684.0 +470 470.0 +362 362.0 +375 375.0 +407 407.0 +397 794.0 +58 116.0 +498 1494.0 +87 87.0 +195 390.0 +197 394.0 +78 78.0 +278 556.0 +12 24.0 +335 335.0 +360 360.0 +308 308.0 +223 446.0 +418 418.0 +43 43.0 +353 706.0 +74 74.0 +427 427.0 +70 210.0 +285 285.0 +126 126.0 diff --git ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..32355d491bb089ed6a286332e3fd1cda1bd95c44 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby1_map_skew.q.out @@ -0,0 +1,427 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 diff --git ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..4dfe32c5f537bd8e5cfe775630f861a9b157faca --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby1_noskew.q.out @@ -0,0 +1,406 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_g1(key INT, value DOUBLE) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest_g1 SELECT src.key, sum(substr(src.value,5)) GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g1 +POSTHOOK: Lineage: dest_g1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g1.* FROM dest_g1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest_g1.* FROM dest_g1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g1 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 diff --git ql/src/test/results/clientpositive/spark/groupby2_map.q.out ql/src/test/results/clientpositive/spark/groupby2_map.q.out new file mode 100644 index 0000000000000000000000000000000000000000..40da0f37cc7846d80bea49e6c58ee4513135856b --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby2_map.q.out @@ -0,0 +1,118 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1), sum(_col1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 diff --git ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out new file mode 100644 index 0000000000000000000000000000000000000000..5b9c8e40eaf4cb3167a5529203825c6d859be073 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby2_map_multi_distinct.q.out @@ -0,0 +1,232 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1), sum(_col1), sum(DISTINCT _col1), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double), _col5 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 +PREHOOK: query: -- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-5560 when group by key is used in distinct funtion, invalid result are returned + +EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col0), sum(_col1), sum(DISTINCT _col1), count(_col2) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double), _col5 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col3) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.key,1,1)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 1 116414.0 10044 115 +2 1 225571.0 15780 111 +3 1 332004.0 20119 99 +4 1 452763.0 30965 124 +5 1 5397.0 278 10 +6 1 6398.0 331 6 +7 1 7735.0 447 10 +8 1 8762.0 595 10 +9 1 91047.0 577 12 diff --git ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..f4a567ea9b7838bcd9e07e66fc5642ade8e88ee2 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby2_map_skew.q.out @@ -0,0 +1,129 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(DISTINCT _col1), sum(_col1) + keys: _col0 (type: string), _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), sum(VALUE._col1) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 diff --git ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..13f8c1869b542b02a0e1bad2bb6f387bd0e1f69a --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby2_noskew.q.out @@ -0,0 +1,111 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g2.* FROM dest_g2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest_g2.* FROM dest_g2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +0 1 00.0 +1 71 116414.0 +2 69 225571.0 +3 62 332004.0 +4 74 452763.0 +5 6 5397.0 +6 5 6398.0 +7 6 7735.0 +8 8 8762.0 +9 7 91047.0 diff --git ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out new file mode 100644 index 0000000000000000000000000000000000000000..0613e73279f3446b41eea5b6714b1ac9f0fe1fb3 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby2_noskew_multi_distinct.q.out @@ -0,0 +1,114 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest_g2 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest_g2 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.c3 EXPRESSION [(src)src.null, ] +POSTHOOK: Lineage: dest_g2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest_g2.* FROM dest_g2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest_g2.* FROM dest_g2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +0 1 00.0 0 3 +1 71 116414.0 10044 115 +2 69 225571.0 15780 111 +3 62 332004.0 20119 99 +4 74 452763.0 30965 124 +5 6 5397.0 278 10 +6 5 6398.0 331 6 +7 6 7735.0 447 10 +8 8 8762.0 595 10 +9 7 91047.0 577 12 diff --git ql/src/test/results/clientpositive/spark/groupby4_map.q.out ql/src/test/results/clientpositive/spark/groupby4_map.q.out new file mode 100644 index 0000000000000000000000000000000000000000..39536bb633c9c30573b3c836d4876d3a74869b95 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby4_map.q.out @@ -0,0 +1,93 @@ +PREHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +500 diff --git ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..535e770496febc4c99a9c18dde697daebc5fcc5d --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby4_map_skew.q.out @@ -0,0 +1,93 @@ +PREHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT count(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +500 diff --git ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..18ee5c759fe338d4c2c7f16bd7cc327b3dda49d6 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby4_noskew.q.out @@ -0,0 +1,104 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(key, 1, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT substr(src.key,1,1) GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git ql/src/test/results/clientpositive/spark/groupby5.q.out ql/src/test/results/clientpositive/spark/groupby5.q.out new file mode 100644 index 0000000000000000000000000000000000000000..b4b0d25321e9389c52e0ed35a1cf73d57e556e3a --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby5.q.out @@ -0,0 +1,429 @@ +PREHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +273 819.0 +275 275.0 +419 419.0 +118 236.0 +202 202.0 +282 564.0 +116 116.0 +82 82.0 +345 345.0 +19 19.0 +42 84.0 +190 190.0 +222 222.0 +257 257.0 +332 332.0 +134 268.0 +165 330.0 +138 552.0 +459 918.0 +163 163.0 +219 438.0 +305 305.0 +411 411.0 +479 479.0 +28 28.0 +318 954.0 +244 244.0 +208 624.0 +136 136.0 +24 48.0 +239 478.0 +84 168.0 +11 11.0 +288 576.0 +367 734.0 +150 150.0 +402 402.0 +466 1398.0 +105 105.0 +224 448.0 +237 474.0 +20 20.0 +484 484.0 +491 491.0 +400 400.0 +97 194.0 +280 560.0 +255 510.0 +103 206.0 +242 484.0 +323 323.0 +309 618.0 +365 365.0 +178 178.0 +26 52.0 +404 808.0 +196 196.0 +448 448.0 +462 924.0 +389 389.0 +33 33.0 +167 501.0 +493 493.0 +338 338.0 +152 304.0 +477 477.0 +183 183.0 +316 948.0 +125 250.0 +444 444.0 +424 848.0 +446 446.0 +310 310.0 +129 258.0 +431 1293.0 +392 392.0 +277 1108.0 +4 4.0 +80 80.0 +228 228.0 +145 145.0 +356 356.0 +53 53.0 +455 455.0 +284 284.0 +149 298.0 +457 457.0 +286 286.0 +37 74.0 +2 2.0 +170 170.0 +187 561.0 +327 981.0 +86 86.0 +291 291.0 +233 466.0 +439 878.0 +266 266.0 +336 336.0 +396 1188.0 +226 226.0 +172 344.0 +176 352.0 +44 44.0 +497 497.0 +66 66.0 +200 400.0 +235 235.0 +77 77.0 +260 260.0 +406 1624.0 +460 460.0 +453 453.0 +143 143.0 +189 189.0 +341 341.0 +64 64.0 +495 495.0 +158 158.0 +8 8.0 +475 475.0 +394 394.0 +169 676.0 +57 57.0 +15 30.0 +35 105.0 +174 348.0 +325 650.0 +0 0.0 +248 248.0 +468 1872.0 +435 435.0 +51 102.0 +321 642.0 +413 826.0 +369 1107.0 +480 1440.0 +156 156.0 +192 192.0 +213 426.0 +374 374.0 +17 17.0 +181 181.0 +482 482.0 +307 614.0 +437 437.0 +194 194.0 +217 434.0 +95 190.0 +262 262.0 +114 114.0 +378 378.0 +417 1251.0 +180 180.0 +281 562.0 +432 432.0 +467 467.0 +201 201.0 +238 476.0 +96 96.0 +386 386.0 +283 283.0 +168 168.0 +209 418.0 +463 926.0 +104 208.0 +317 634.0 +252 252.0 +377 377.0 +436 436.0 +131 131.0 +230 1150.0 +41 41.0 +191 382.0 +373 373.0 +494 494.0 +193 579.0 +83 166.0 +496 496.0 +166 166.0 +229 458.0 +298 894.0 +133 133.0 +333 666.0 +65 65.0 +292 292.0 +364 364.0 +274 274.0 +472 472.0 +67 134.0 +401 2005.0 +47 47.0 +5 15.0 +18 36.0 +27 27.0 +344 688.0 +409 1227.0 +256 512.0 +85 85.0 +54 54.0 +72 144.0 +393 393.0 +160 160.0 +351 351.0 +438 1314.0 +263 263.0 +207 414.0 +449 449.0 +489 1956.0 +111 111.0 +128 384.0 +119 357.0 +289 289.0 +399 798.0 +331 662.0 +205 410.0 +177 177.0 +76 152.0 +348 1740.0 +478 956.0 +458 916.0 +384 1152.0 +157 157.0 +315 315.0 +469 2345.0 +302 302.0 +395 790.0 +382 764.0 +162 162.0 +221 442.0 +249 249.0 +69 69.0 +203 406.0 +199 597.0 +454 1362.0 +113 226.0 +218 218.0 +241 241.0 +272 544.0 +421 421.0 +403 1209.0 +120 240.0 +366 366.0 +98 196.0 +214 214.0 +164 328.0 +92 92.0 +258 258.0 +34 34.0 +265 530.0 +175 350.0 +368 368.0 +414 828.0 +429 858.0 +30 30.0 +492 984.0 +9 9.0 +296 296.0 +311 933.0 +247 247.0 +487 487.0 +306 306.0 +153 153.0 +339 339.0 +322 644.0 +10 10.0 +430 1290.0 +155 155.0 +452 452.0 +179 358.0 +443 443.0 +490 490.0 +485 485.0 +362 362.0 +379 379.0 +186 186.0 +100 200.0 +137 274.0 +483 483.0 +90 270.0 +481 481.0 +287 287.0 +146 292.0 +216 432.0 +342 684.0 +470 470.0 +375 375.0 +407 407.0 +397 794.0 +58 116.0 +498 1494.0 +87 87.0 +195 390.0 +197 394.0 +78 78.0 +278 556.0 +12 24.0 +335 335.0 +360 360.0 +308 308.0 +223 446.0 +418 418.0 +43 43.0 +353 706.0 +70 210.0 +427 427.0 +126 126.0 +285 285.0 +74 74.0 diff --git ql/src/test/results/clientpositive/spark/groupby5_map.q.out ql/src/test/results/clientpositive/spark/groupby5_map.q.out new file mode 100644 index 0000000000000000000000000000000000000000..e77592161dcdbbd586ebbf3af9d518c7b8255ebf --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby5_map.q.out @@ -0,0 +1,95 @@ +PREHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +130091 diff --git ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..5c5893469c22644e825adf9a676ce96d4312c70c --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby5_map_skew.q.out @@ -0,0 +1,95 @@ +PREHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src INSERT OVERWRITE TABLE dest1 SELECT sum(src.key) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +130091 diff --git ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..5be29fe85f88180be64db9e2d0117138309a05bb --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby5_noskew.q.out @@ -0,0 +1,418 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(key INT, value STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), substr(value, 5) (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: UDFToInteger(_col0) (type: int), _col1 (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: INSERT OVERWRITE TABLE dest1 +SELECT src.key, sum(substr(src.value,5)) +FROM src +GROUP BY src.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 0.0 +10 10.0 +100 200.0 +103 206.0 +104 208.0 +105 105.0 +11 11.0 +111 111.0 +113 226.0 +114 114.0 +116 116.0 +118 236.0 +119 357.0 +12 24.0 +120 240.0 +125 250.0 +126 126.0 +128 384.0 +129 258.0 +131 131.0 +133 133.0 +134 268.0 +136 136.0 +137 274.0 +138 552.0 +143 143.0 +145 145.0 +146 292.0 +149 298.0 +15 30.0 +150 150.0 +152 304.0 +153 153.0 +155 155.0 +156 156.0 +157 157.0 +158 158.0 +160 160.0 +162 162.0 +163 163.0 +164 328.0 +165 330.0 +166 166.0 +167 501.0 +168 168.0 +169 676.0 +17 17.0 +170 170.0 +172 344.0 +174 348.0 +175 350.0 +176 352.0 +177 177.0 +178 178.0 +179 358.0 +18 36.0 +180 180.0 +181 181.0 +183 183.0 +186 186.0 +187 561.0 +189 189.0 +19 19.0 +190 190.0 +191 382.0 +192 192.0 +193 579.0 +194 194.0 +195 390.0 +196 196.0 +197 394.0 +199 597.0 +2 2.0 +20 20.0 +200 400.0 +201 201.0 +202 202.0 +203 406.0 +205 410.0 +207 414.0 +208 624.0 +209 418.0 +213 426.0 +214 214.0 +216 432.0 +217 434.0 +218 218.0 +219 438.0 +221 442.0 +222 222.0 +223 446.0 +224 448.0 +226 226.0 +228 228.0 +229 458.0 +230 1150.0 +233 466.0 +235 235.0 +237 474.0 +238 476.0 +239 478.0 +24 48.0 +241 241.0 +242 484.0 +244 244.0 +247 247.0 +248 248.0 +249 249.0 +252 252.0 +255 510.0 +256 512.0 +257 257.0 +258 258.0 +26 52.0 +260 260.0 +262 262.0 +263 263.0 +265 530.0 +266 266.0 +27 27.0 +272 544.0 +273 819.0 +274 274.0 +275 275.0 +277 1108.0 +278 556.0 +28 28.0 +280 560.0 +281 562.0 +282 564.0 +283 283.0 +284 284.0 +285 285.0 +286 286.0 +287 287.0 +288 576.0 +289 289.0 +291 291.0 +292 292.0 +296 296.0 +298 894.0 +30 30.0 +302 302.0 +305 305.0 +306 306.0 +307 614.0 +308 308.0 +309 618.0 +310 310.0 +311 933.0 +315 315.0 +316 948.0 +317 634.0 +318 954.0 +321 642.0 +322 644.0 +323 323.0 +325 650.0 +327 981.0 +33 33.0 +331 662.0 +332 332.0 +333 666.0 +335 335.0 +336 336.0 +338 338.0 +339 339.0 +34 34.0 +341 341.0 +342 684.0 +344 688.0 +345 345.0 +348 1740.0 +35 105.0 +351 351.0 +353 706.0 +356 356.0 +360 360.0 +362 362.0 +364 364.0 +365 365.0 +366 366.0 +367 734.0 +368 368.0 +369 1107.0 +37 74.0 +373 373.0 +374 374.0 +375 375.0 +377 377.0 +378 378.0 +379 379.0 +382 764.0 +384 1152.0 +386 386.0 +389 389.0 +392 392.0 +393 393.0 +394 394.0 +395 790.0 +396 1188.0 +397 794.0 +399 798.0 +4 4.0 +400 400.0 +401 2005.0 +402 402.0 +403 1209.0 +404 808.0 +406 1624.0 +407 407.0 +409 1227.0 +41 41.0 +411 411.0 +413 826.0 +414 828.0 +417 1251.0 +418 418.0 +419 419.0 +42 84.0 +421 421.0 +424 848.0 +427 427.0 +429 858.0 +43 43.0 +430 1290.0 +431 1293.0 +432 432.0 +435 435.0 +436 436.0 +437 437.0 +438 1314.0 +439 878.0 +44 44.0 +443 443.0 +444 444.0 +446 446.0 +448 448.0 +449 449.0 +452 452.0 +453 453.0 +454 1362.0 +455 455.0 +457 457.0 +458 916.0 +459 918.0 +460 460.0 +462 924.0 +463 926.0 +466 1398.0 +467 467.0 +468 1872.0 +469 2345.0 +47 47.0 +470 470.0 +472 472.0 +475 475.0 +477 477.0 +478 956.0 +479 479.0 +480 1440.0 +481 481.0 +482 482.0 +483 483.0 +484 484.0 +485 485.0 +487 487.0 +489 1956.0 +490 490.0 +491 491.0 +492 984.0 +493 493.0 +494 494.0 +495 495.0 +496 496.0 +497 497.0 +498 1494.0 +5 15.0 +51 102.0 +53 53.0 +54 54.0 +57 57.0 +58 116.0 +64 64.0 +65 65.0 +66 66.0 +67 134.0 +69 69.0 +70 210.0 +72 144.0 +74 74.0 +76 152.0 +77 77.0 +78 78.0 +8 8.0 +80 80.0 +82 82.0 +83 166.0 +84 168.0 +85 85.0 +86 86.0 +87 87.0 +9 9.0 +90 270.0 +92 92.0 +95 190.0 +96 96.0 +97 194.0 +98 196.0 diff --git ql/src/test/results/clientpositive/spark/groupby6.q.out ql/src/test/results/clientpositive/spark/groupby6.q.out new file mode 100644 index 0000000000000000000000000000000000000000..c3caccd414d6bbfb99d18f630409def553ecde4b --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby6.q.out @@ -0,0 +1,113 @@ +PREHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +4 +8 +6 +0 +2 +7 +5 +9 +3 +1 diff --git ql/src/test/results/clientpositive/spark/groupby6_map.q.out ql/src/test/results/clientpositive/spark/groupby6_map.q.out new file mode 100644 index 0000000000000000000000000000000000000000..77af6751c99e1bbd7641e846525ad0dd9d9dcb63 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby6_map.q.out @@ -0,0 +1,109 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..527bf1477ea595a0fed9fa11a11b0003c3d3bf38 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby6_map_skew.q.out @@ -0,0 +1,122 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 31) + Reducer 3 <- Reducer 2 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out new file mode 100644 index 0000000000000000000000000000000000000000..2d8fb7410ce5fdef97c004ecd1a481a0411026fe --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby6_noskew.q.out @@ -0,0 +1,104 @@ +PREHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: -- SORT_QUERY_RESULTS + +CREATE TABLE dest1(c1 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 31) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: substr(value, 5, 1) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator + +PREHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM src +INSERT OVERWRITE TABLE dest1 SELECT DISTINCT substr(src.value,5,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 diff --git ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out new file mode 100644 index 0000000000000000000000000000000000000000..9a5c8328b5e22642710d4461225dbe4b232f74f9 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby_grouping_id2.q.out @@ -0,0 +1,230 @@ +PREHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@T1 +POSTHOOK: query: CREATE TABLE T1(key INT, value INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@T1 +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1 +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@t1 +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/groupby_groupingid.txt' INTO TABLE T1 +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@t1 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +NULL NULL 0 6 +1 1 3 1 +2 NULL 1 1 +2 2 3 1 +3 3 3 1 +4 NULL 1 1 +1 NULL 1 2 +1 NULL 3 1 +3 NULL 1 2 +3 NULL 3 1 +4 5 3 1 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 1 +3 6 +1 4 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +PREHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +1 NULL 3 1 +1 NULL 1 2 +NULL NULL 0 6 +4 5 3 1 +3 NULL 3 1 +3 NULL 1 2 +4 NULL 1 1 +2 NULL 1 1 +2 2 3 1 +1 1 3 1 +3 3 3 1 +PREHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT GROUPING__ID, count(*) +FROM +( +SELECT key, value, GROUPING__ID, count(*) from T1 GROUP BY key, value WITH ROLLUP +) t +GROUP BY GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 1 +3 6 +1 4 +PREHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.GROUPING__ID, t2.GROUPING__ID FROM (SELECT GROUPING__ID FROM T1 GROUP BY key,value WITH ROLLUP) t1 +JOIN +(SELECT GROUPING__ID FROM T1 GROUP BY key, value WITH ROLLUP) t2 +ON t1.GROUPING__ID = t2.GROUPING__ID +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +#### A masked pattern was here #### +0 0 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +1 1 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 +3 3 diff --git ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out new file mode 100644 index 0000000000000000000000000000000000000000..01ea4ea5ea0c680b71acd3c9dcaec137c40f18c2 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby_ppr_multi_distinct.q.out @@ -0,0 +1,346 @@ +PREHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest1 +POSTHOOK: query: CREATE TABLE dest1(key STRING, c1 INT, c2 STRING, c3 INT, c4 INT) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest1 +PREHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN EXTENDED +FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + +TOK_QUERY + TOK_FROM + TOK_TABREF + TOK_TABNAME + srcpart + src + TOK_INSERT + TOK_DESTINATION + TOK_TAB + TOK_TABNAME + dest1 + TOK_SELECT + TOK_SELEXPR + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_SELEXPR + TOK_FUNCTIONDI + count + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTION + concat + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + TOK_FUNCTION + sum + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTIONDI + sum + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + value + 5 + TOK_SELEXPR + TOK_FUNCTIONDI + count + . + TOK_TABLE_OR_COL + src + value + TOK_WHERE + = + . + TOK_TABLE_OR_COL + src + ds + '2008-04-08' + TOK_GROUPBY + TOK_FUNCTION + substr + . + TOK_TABLE_OR_COL + src + key + 1 + 1 + + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + GatherStats: false + Select Operator + expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE + tag: -1 + auto parallelism: false + Path -> Alias: +#### A masked pattern was here #### + Path -> Partition: +#### A masked pattern was here #### + Partition + base file name: hr=11 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 11 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart +#### A masked pattern was here #### + Partition + base file name: hr=12 + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + partition values: + ds 2008-04-08 + hr 12 + properties: + COLUMN_STATS_ACCURATE true + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + numFiles 1 + numRows 500 + partition_columns ds/hr + partition_columns.types string:string + rawDataSize 5312 + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + totalSize 5812 +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.comments 'default','default' + columns.types string:string +#### A masked pattern was here #### + name default.srcpart + partition_columns ds/hr + partition_columns.types string:string + serialization.ddl struct srcpart { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.srcpart + name: default.srcpart + Truncated Path -> Alias: + /srcpart/ds=2008-04-08/hr=11 [src] + /srcpart/ds=2008-04-08/hr=12 [src] + Reducer 2 + Needs Tagging: false + Reduce Operator Tree: + Group By Operator + aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(DISTINCT KEY._col1:2._col0) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + GlobalTableId: 1 +#### A masked pattern was here #### + NumFilesPerFileSink: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2,c3,c4 + columns.comments + columns.types string:int:string:int:int +#### A masked pattern was here #### + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + TotalFiles: 1 + GatherStats: true + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + replace: true +#### A masked pattern was here #### + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,c1,c2,c3,c4 + columns.comments + columns.types string:int:string:int:int +#### A masked pattern was here #### + name default.dest1 + serialization.ddl struct dest1 { string key, i32 c1, string c2, i32 c3, i32 c4} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe +#### A masked pattern was here #### + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dest1 + + Stage: Stage-2 + Stats-Aggr Operator +#### A masked pattern was here #### + +PREHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@dest1 +POSTHOOK: query: FROM srcpart src +INSERT OVERWRITE TABLE dest1 +SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(DISTINCT src.value) +WHERE src.ds = '2008-04-08' +GROUP BY substr(src.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@dest1 +POSTHOOK: Lineage: dest1.c1 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c2 EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), (srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.c3 EXPRESSION [(srcpart)src.null, ] +POSTHOOK: Lineage: dest1.c4 EXPRESSION [(srcpart)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: dest1.key EXPRESSION [(srcpart)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: SELECT dest1.* FROM dest1 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest1.* FROM dest1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest1 +#### A masked pattern was here #### +0 1 00.0 0 1 +2 69 251142.0 15780 69 +4 74 4105526.0 30965 74 +6 5 6796.0 331 5 +8 8 81524.0 595 8 +1 71 132828.0 10044 71 +3 62 364008.0 20119 62 +5 6 5794.0 278 6 +7 6 71470.0 447 6 +9 7 92094.0 577 7 diff --git ql/src/test/results/clientpositive/spark/groupby_resolution.q.out ql/src/test/results/clientpositive/spark/groupby_resolution.q.out new file mode 100644 index 0000000000000000000000000000000000000000..9d360998f3c46e630f5effef9e7fece341a0bca9 --- /dev/null +++ ql/src/test/results/clientpositive/spark/groupby_resolution.q.out @@ -0,0 +1,796 @@ +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count() + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select key, count(*) from src b group by b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain select key, count(*) from src b group by b.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain select b.key, count(*) from src b group by key +PREHOOK: type: QUERY +POSTHOOK: query: explain select b.key, count(*) from src b group by key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- windowing after group by +select key, count(*), rank() over(order by count(*)) +from src b +where key < '12' +group by b.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- windowing after group by +select key, count(*), rank() over(order by count(*)) +from src b +where key < '12' +group by b.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 12 +10 1 1 +100 2 7 +103 2 7 +104 2 7 +105 1 1 +11 1 1 +111 1 1 +113 2 7 +114 1 1 +116 1 1 +118 2 7 +119 3 12 +PREHOOK: query: -- having after group by +select key, count(*) +from src b +group by b.key +having key < '12' +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- having after group by +select key, count(*) +from src b +group by b.key +having key < '12' +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 +10 1 +100 2 +103 2 +104 2 +105 1 +11 1 +111 1 +113 2 +114 1 +116 1 +118 2 +119 3 +PREHOOK: query: -- having and windowing +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- having and windowing +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 3 12 +10 1 1 +100 2 7 +103 2 7 +104 2 7 +105 1 1 +11 1 1 +111 1 1 +113 2 7 +114 1 1 +116 1 1 +118 2 7 +119 3 12 +PREHOOK: query: explain +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +PREHOOK: type: QUERY +POSTHOOK: query: explain +select key, count(*), rank() over(order by count(*)) +from src b +group by b.key +having key < '12' +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) + Reducer 4 <- Reducer 3 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (key < '12') (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: 0 (type: int), _col1 (type: bigint) + sort order: ++ + Map-reduce partition columns: 0 (type: int) + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string), _col1 (type: bigint) + Reducer 4 + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + PTF Operator + Function definitions: + Input definition + input alias: ptf_0 + output shape: _col0: string, _col1: bigint + type: WINDOWING + Windowing table definition + input alias: ptf_1 + name: windowingtablefunction + order by: _col1 + partition by: 0 + raw input shape: + window functions: + window function definition + alias: rank_window_0 + arguments: _col1 + name: rank + window function: GenericUDAFRankEvaluator + window frame: PRECEDING(MAX)~FOLLOWING(MAX) + isPivotResult: true + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), rank_window_0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: -- order by +select key +from src t +where key < '12' +group by t.key +order by t.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- order by +select key +from src t +where key < '12' +group by t.key +order by t.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +10 +100 +103 +104 +105 +11 +111 +113 +114 +116 +118 +119 +PREHOOK: query: -- cluster by +EXPLAIN +SELECT x.key, x.value as key FROM SRC x CLUSTER BY key +PREHOOK: type: QUERY +POSTHOOK: query: -- cluster by +EXPLAIN +SELECT x.key, x.value as key FROM SRC x CLUSTER BY key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Reducer 2 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/spark/nullgroup.q.out ql/src/test/results/clientpositive/spark/nullgroup.q.out new file mode 100644 index 0000000000000000000000000000000000000000..91b5723fb01aed755323da44ecbae54af10494de --- /dev/null +++ ql/src/test/results/clientpositive/spark/nullgroup.q.out @@ -0,0 +1,265 @@ +PREHOOK: query: explain +select count(1) from src x where x.key > 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) from src x where x.key > 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 9999.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from src x where x.key > 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from src x where x.key > 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(1) from src x where x.key > 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) from src x where x.key > 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 9999.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from src x where x.key > 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from src x where x.key > 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(1) from src x where x.key > 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) from src x where x.key > 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 9999.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1) + mode: partial1 + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: final + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from src x where x.key > 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from src x where x.key > 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 +PREHOOK: query: explain +select count(1) from src x where x.key > 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1) from src x where x.key > 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 9999.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1) + mode: complete + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1) from src x where x.key > 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1) from src x where x.key > 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 diff --git ql/src/test/results/clientpositive/spark/nullgroup2.q.out ql/src/test/results/clientpositive/spark/nullgroup2.q.out new file mode 100644 index 0000000000000000000000000000000000000000..5749b9ed60c8fe8dbeb83711b22b3ac82e2ec9d5 --- /dev/null +++ ql/src/test/results/clientpositive/spark/nullgroup2.q.out @@ -0,0 +1,300 @@ +PREHOOK: query: explain +select x.key, count(1) from src x where x.key > 9999 group by x.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select x.key, count(1) from src x where x.key > 9999 group by x.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 9999.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select x.key, count(1) from src x where x.key > 9999 group by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select x.key, count(1) from src x where x.key > 9999 group by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain +select x.key, count(1) from src x where x.key > 9999 group by x.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select x.key, count(1) from src x where x.key > 9999 group by x.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 9999.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1) + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select x.key, count(1) from src x where x.key > 9999 group by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select x.key, count(1) from src x where x.key > 9999 group by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain +select x.key, count(1) from src x where x.key > 9999 group by x.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select x.key, count(1) from src x where x.key > 9999 group by x.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 9999.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: rand() (type: double) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1) + keys: KEY._col0 (type: string) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select x.key, count(1) from src x where x.key > 9999 group by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select x.key, count(1) from src x where x.key > 9999 group by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +PREHOOK: query: explain +select x.key, count(1) from src x where x.key > 9999 group by x.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +select x.key, count(1) from src x where x.key > 9999 group by x.key +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP, 2) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) > 9999.0) (type: boolean) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1) + keys: KEY._col0 (type: string) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select x.key, count(1) from src x where x.key > 9999 group by x.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select x.key, count(1) from src x where x.key > 9999 group by x.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/spark/nullgroup4.q.out ql/src/test/results/clientpositive/spark/nullgroup4.q.out new file mode 100644 index 0000000000000000000000000000000000000000..78844b0d85c74268bac4fa1398de8d05ea4f2175 --- /dev/null +++ ql/src/test/results/clientpositive/spark/nullgroup4.q.out @@ -0,0 +1,292 @@ +PREHOOK: query: explain +select count(1), count(distinct x.value) from src x where x.key = 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1), count(distinct x.value) from src x where x.key = 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 9999.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1), count(DISTINCT _col1) + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0) + mode: partials + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(distinct x.value) from src x where x.key = 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(distinct x.value) from src x where x.key = 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +PREHOOK: query: explain +select count(1), count(distinct x.value) from src x where x.key = 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1), count(distinct x.value) from src x where x.key = 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 9999.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1), count(DISTINCT _col1) + keys: _col1 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(distinct x.value) from src x where x.key = 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(distinct x.value) from src x where x.key = 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +PREHOOK: query: explain +select count(1), count(distinct x.value) from src x where x.key = 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1), count(distinct x.value) from src x where x.key = 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 2) + Reducer 3 <- Reducer 2 (GROUP, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 9999.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1), count(DISTINCT KEY._col0:0._col0) + mode: partial1 + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 3 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: final + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(distinct x.value) from src x where x.key = 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(distinct x.value) from src x where x.key = 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 +PREHOOK: query: explain +select count(1), count(distinct x.value) from src x where x.key = 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1), count(distinct x.value) from src x where x.key = 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 9999.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string) + outputColumnNames: _col1 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1), count(DISTINCT KEY._col0:0._col0) + mode: complete + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(distinct x.value) from src x where x.key = 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(distinct x.value) from src x where x.key = 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 diff --git ql/src/test/results/clientpositive/spark/nullgroup4_multi_distinct.q.out ql/src/test/results/clientpositive/spark/nullgroup4_multi_distinct.q.out new file mode 100644 index 0000000000000000000000000000000000000000..2e4e07d1b9aeb99a402eebe1bc092a3c01dcfaed --- /dev/null +++ ql/src/test/results/clientpositive/spark/nullgroup4_multi_distinct.q.out @@ -0,0 +1,133 @@ +PREHOOK: query: explain +select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 9999.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), substr(value, 5) (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(1), count(DISTINCT _col1), count(DISTINCT _col2) + keys: _col1 (type: string), _col2 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: bigint) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 0 +PREHOOK: query: explain +select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Spark + Edges: + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (UDFToDouble(key) = 9999.0) (type: boolean) + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), substr(value, 5) (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string), _col2 (type: string) + sort order: ++ + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: count(1), count(DISTINCT KEY._col0:0._col0), count(DISTINCT KEY._col0:1._col0) + mode: complete + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select count(1), count(distinct x.value), count(distinct substr(x.value, 5)) from src x where x.key = 9999 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 0 0 diff --git ql/src/test/results/clientpositive/spark/temp_table_gb1.q.out ql/src/test/results/clientpositive/spark/temp_table_gb1.q.out new file mode 100644 index 0000000000000000000000000000000000000000..473245da6fc46ebe6cac6185e8e01874045a2689 --- /dev/null +++ ql/src/test/results/clientpositive/spark/temp_table_gb1.q.out @@ -0,0 +1,67 @@ +PREHOOK: query: -- Taken from groupby2.q +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: -- Taken from groupby2.q +CREATE TABLE dest_g2(key STRING, c1 INT, c2 STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: CREATE TEMPORARY TABLE src_temp AS SELECT * FROM src +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@src +PREHOOK: Output: database:default +PREHOOK: Output: default@src_temp +POSTHOOK: query: CREATE TEMPORARY TABLE src_temp AS SELECT * FROM src +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@src +POSTHOOK: Output: database:default +POSTHOOK: Output: default@src_temp +PREHOOK: query: FROM src_temp +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) +PREHOOK: type: QUERY +PREHOOK: Input: default@src_temp +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: FROM src_temp +INSERT OVERWRITE TABLE dest_g2 SELECT substr(src_temp.key,1,1), count(DISTINCT substr(src_temp.value,5)), concat(substr(src_temp.key,1,1),sum(substr(src_temp.value,5))) GROUP BY substr(src_temp.key,1,1) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src_temp +POSTHOOK: Output: default@dest_g2 +POSTHOOK: Lineage: dest_g2.c1 EXPRESSION [(src_temp)src_temp.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest_g2.c2 EXPRESSION [(src_temp)src_temp.FieldSchema(name:key, type:string, comment:null), (src_temp)src_temp.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Lineage: dest_g2.key EXPRESSION [(src_temp)src_temp.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: SELECT dest_g2.* FROM dest_g2 +PREHOOK: type: QUERY +PREHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT dest_g2.* FROM dest_g2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@dest_g2 +#### A masked pattern was here #### +4 74 452763.0 +8 8 8762.0 +6 5 6398.0 +0 1 00.0 +2 69 225571.0 +7 6 7735.0 +5 6 5397.0 +9 7 91047.0 +3 62 332004.0 +1 71 116414.0 +PREHOOK: query: DROP TABLE dest_g2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@dest_g2 +PREHOOK: Output: default@dest_g2 +POSTHOOK: query: DROP TABLE dest_g2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@dest_g2 +POSTHOOK: Output: default@dest_g2 +PREHOOK: query: DROP TABLE src_temp +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@src_temp +PREHOOK: Output: default@src_temp +POSTHOOK: query: DROP TABLE src_temp +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@src_temp +POSTHOOK: Output: default@src_temp diff --git ql/src/test/results/clientpositive/spark/udaf_collect_set.q.out ql/src/test/results/clientpositive/spark/udaf_collect_set.q.out new file mode 100644 index 0000000000000000000000000000000000000000..42f2d9d35f9e9eeb76405cb1d56a4db1fd3f5423 --- /dev/null +++ ql/src/test/results/clientpositive/spark/udaf_collect_set.q.out @@ -0,0 +1,212 @@ +PREHOOK: query: DESCRIBE FUNCTION collect_set +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION collect_set +POSTHOOK: type: DESCFUNCTION +collect_set(x) - Returns a set of objects with duplicate elements eliminated +PREHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED collect_set +POSTHOOK: type: DESCFUNCTION +collect_set(x) - Returns a set of objects with duplicate elements eliminated +PREHOOK: query: DESCRIBE FUNCTION collect_list +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION collect_list +POSTHOOK: type: DESCFUNCTION +collect_list(x) - Returns a list of objects with duplicates +PREHOOK: query: DESCRIBE FUNCTION EXTENDED collect_list +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED collect_list +POSTHOOK: type: DESCFUNCTION +collect_list(x) - Returns a list of objects with duplicates +PREHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 ["val_0"] +10 ["val_10"] +100 ["val_100"] +103 ["val_103"] +104 ["val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118"] +119 ["val_119"] +12 ["val_12"] +120 ["val_120"] +125 ["val_125"] +126 ["val_126"] +128 ["val_128"] +129 ["val_129"] +131 ["val_131"] +PREHOOK: query: SELECT key, collect_list(value) +FROM src +GROUP BY key ORDER by key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, collect_list(value) +FROM src +GROUP BY key ORDER by key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 ["val_0","val_0","val_0"] +10 ["val_10"] +100 ["val_100","val_100"] +103 ["val_103","val_103"] +104 ["val_104","val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113","val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118","val_118"] +119 ["val_119","val_119","val_119"] +12 ["val_12","val_12"] +120 ["val_120","val_120"] +125 ["val_125","val_125"] +126 ["val_126"] +128 ["val_128","val_128","val_128"] +129 ["val_129","val_129"] +131 ["val_131"] +PREHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 ["val_0"] +10 ["val_10"] +100 ["val_100"] +103 ["val_103"] +104 ["val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118"] +119 ["val_119"] +12 ["val_12"] +120 ["val_120"] +125 ["val_125"] +126 ["val_126"] +128 ["val_128"] +129 ["val_129"] +131 ["val_131"] +PREHOOK: query: SELECT key, collect_list(value) +FROM src +GROUP BY key ORDER BY key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, collect_list(value) +FROM src +GROUP BY key ORDER BY key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 ["val_0","val_0","val_0"] +10 ["val_10"] +100 ["val_100","val_100"] +103 ["val_103","val_103"] +104 ["val_104","val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113","val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118","val_118"] +119 ["val_119","val_119","val_119"] +12 ["val_12","val_12"] +120 ["val_120","val_120"] +125 ["val_125","val_125"] +126 ["val_126"] +128 ["val_128","val_128","val_128"] +129 ["val_129","val_129"] +131 ["val_131"] +PREHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 ["val_0"] +10 ["val_10"] +100 ["val_100"] +103 ["val_103"] +104 ["val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118"] +119 ["val_119"] +12 ["val_12"] +120 ["val_120"] +125 ["val_125"] +126 ["val_126"] +128 ["val_128"] +129 ["val_129"] +131 ["val_131"] +PREHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, collect_set(value) +FROM src +GROUP BY key ORDER BY key limit 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 ["val_0"] +10 ["val_10"] +100 ["val_100"] +103 ["val_103"] +104 ["val_104"] +105 ["val_105"] +11 ["val_11"] +111 ["val_111"] +113 ["val_113"] +114 ["val_114"] +116 ["val_116"] +118 ["val_118"] +119 ["val_119"] +12 ["val_12"] +120 ["val_120"] +125 ["val_125"] +126 ["val_126"] +128 ["val_128"] +129 ["val_129"] +131 ["val_131"] diff --git ql/src/test/results/clientpositive/spark/udf_max.q.out ql/src/test/results/clientpositive/spark/udf_max.q.out new file mode 100644 index 0000000000000000000000000000000000000000..8535752165e751a2217d05684e9800760aa8b31b --- /dev/null +++ ql/src/test/results/clientpositive/spark/udf_max.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: DESCRIBE FUNCTION max +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION max +POSTHOOK: type: DESCFUNCTION +max(expr) - Returns the maximum value of expr +PREHOOK: query: DESCRIBE FUNCTION EXTENDED max +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED max +POSTHOOK: type: DESCFUNCTION +max(expr) - Returns the maximum value of expr +PREHOOK: query: SELECT max(struct(CAST(key as INT), value)), + max(struct(key, value)) +FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT max(struct(CAST(key as INT), value)), + max(struct(key, value)) +FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"col1":498,"col2":"val_498"} {"col1":"98","col2":"val_98"} +PREHOOK: query: SELECT max(struct(CAST(key as INT), value)), + max(struct(key, value)) +FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT max(struct(CAST(key as INT), value)), + max(struct(key, value)) +FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"col1":498,"col2":"val_498"} {"col1":"98","col2":"val_98"} +PREHOOK: query: SELECT max(struct(CAST(key as INT), value)), + max(struct(key, value)) +FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT max(struct(CAST(key as INT), value)), + max(struct(key, value)) +FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"col1":498,"col2":"val_498"} {"col1":"98","col2":"val_98"} +PREHOOK: query: SELECT max(struct(CAST(key as INT), value)), + max(struct(key, value)) +FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT max(struct(CAST(key as INT), value)), + max(struct(key, value)) +FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"col1":498,"col2":"val_498"} {"col1":"98","col2":"val_98"} diff --git ql/src/test/results/clientpositive/spark/udf_min.q.out ql/src/test/results/clientpositive/spark/udf_min.q.out new file mode 100644 index 0000000000000000000000000000000000000000..ca8158e455e92f87eb01600fb0a08cd143f99c26 --- /dev/null +++ ql/src/test/results/clientpositive/spark/udf_min.q.out @@ -0,0 +1,62 @@ +PREHOOK: query: DESCRIBE FUNCTION min +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION min +POSTHOOK: type: DESCFUNCTION +min(expr) - Returns the minimum value of expr +PREHOOK: query: DESCRIBE FUNCTION EXTENDED min +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED min +POSTHOOK: type: DESCFUNCTION +min(expr) - Returns the minimum value of expr +PREHOOK: query: SELECT min(struct(CAST(key as INT), value)), + min(struct(key, value)) +FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT min(struct(CAST(key as INT), value)), + min(struct(key, value)) +FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"col1":0,"col2":"val_0"} {"col1":"0","col2":"val_0"} +PREHOOK: query: SELECT min(struct(CAST(key as INT), value)), + min(struct(key, value)) +FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT min(struct(CAST(key as INT), value)), + min(struct(key, value)) +FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"col1":0,"col2":"val_0"} {"col1":"0","col2":"val_0"} +PREHOOK: query: SELECT min(struct(CAST(key as INT), value)), + min(struct(key, value)) +FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT min(struct(CAST(key as INT), value)), + min(struct(key, value)) +FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"col1":0,"col2":"val_0"} {"col1":"0","col2":"val_0"} +PREHOOK: query: SELECT min(struct(CAST(key as INT), value)), + min(struct(key, value)) +FROM src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT min(struct(CAST(key as INT), value)), + min(struct(key, value)) +FROM src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +{"col1":0,"col2":"val_0"} {"col1":"0","col2":"val_0"} diff --git ql/src/test/results/clientpositive/spark/udf_percentile.q.out ql/src/test/results/clientpositive/spark/udf_percentile.q.out new file mode 100644 index 0000000000000000000000000000000000000000..bb929297b6f2fc14ad593bcd368d5dbdba99c91c --- /dev/null +++ ql/src/test/results/clientpositive/spark/udf_percentile.q.out @@ -0,0 +1,446 @@ +PREHOOK: query: DESCRIBE FUNCTION percentile +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION percentile +POSTHOOK: type: DESCFUNCTION +percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array +PREHOOK: query: DESCRIBE FUNCTION EXTENDED percentile +PREHOOK: type: DESCFUNCTION +POSTHOOK: query: DESCRIBE FUNCTION EXTENDED percentile +POSTHOOK: type: DESCFUNCTION +percentile(expr, pc) - Returns the percentile(s) of expr at pc (range: [0,1]).pc can be a double or double array +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +PREHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT CAST(key AS INT) DIV 10, + percentile(CAST(substr(value, 5) AS INT), 0.0), + percentile(CAST(substr(value, 5) AS INT), 0.5), + percentile(CAST(substr(value, 5) AS INT), 1.0), + percentile(CAST(substr(value, 5) AS INT), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +34 341.0 345.0 348.0 [341.0,345.0,348.0,348.0] +4 41.0 42.5 47.0 [41.0,42.5,46.849999999999994,47.0] +16 160.0 166.5 169.0 [160.0,166.5,169.0,169.0] +22 221.0 224.0 229.0 [221.0,224.0,229.0,229.0] +28 280.0 283.5 289.0 [280.0,283.5,288.87,289.0] +48 480.0 484.0 489.0 [480.0,484.0,489.0,489.0] +46 460.0 467.5 469.0 [460.0,467.5,469.0,469.0] +30 302.0 307.0 309.0 [302.0,307.0,309.0,309.0] +14 143.0 146.0 149.0 [143.0,146.0,149.0,149.0] +32 321.0 324.0 327.0 [321.0,324.0,327.0,327.0] +36 360.0 367.0 369.0 [360.0,367.0,369.0,369.0] +0 0.0 4.5 9.0 [0.0,4.5,8.91,9.0] +24 241.0 244.0 249.0 [241.0,244.0,248.94,249.0] +42 421.0 425.5 429.0 [421.0,425.5,429.0,429.0] +40 400.0 403.5 409.0 [400.0,403.5,409.0,409.0] +6 64.0 66.5 69.0 [64.0,66.5,68.9,69.0] +8 80.0 84.0 87.0 [80.0,84.0,86.92,87.0] +18 180.0 186.5 189.0 [180.0,186.5,188.86,189.0] +38 382.0 384.0 389.0 [382.0,384.0,388.82,389.0] +20 200.0 205.0 209.0 [200.0,205.0,209.0,209.0] +12 120.0 127.0 129.0 [120.0,127.0,129.0,129.0] +44 443.0 446.0 449.0 [443.0,446.0,448.96,449.0] +26 260.0 264.0 266.0 [260.0,264.0,265.95,266.0] +10 100.0 103.0 105.0 [100.0,103.0,104.94,105.0] +2 20.0 26.0 28.0 [20.0,26.0,27.939999999999998,28.0] +13 131.0 137.0 138.0 [131.0,137.0,138.0,138.0] +41 411.0 415.5 419.0 [411.0,415.5,418.91,419.0] +19 190.0 194.5 199.0 [190.0,194.5,199.0,199.0] +15 150.0 154.0 158.0 [150.0,154.0,157.92999999999998,158.0] +21 213.0 216.5 219.0 [213.0,216.5,219.0,219.0] +47 470.0 477.0 479.0 [470.0,477.0,478.94,479.0] +39 392.0 396.0 399.0 [392.0,396.0,399.0,399.0] +25 252.0 256.0 258.0 [252.0,256.0,257.94,258.0] +29 291.0 297.0 298.0 [291.0,297.0,298.0,298.0] +35 351.0 353.0 356.0 [351.0,353.0,355.91,356.0] +11 111.0 117.0 119.0 [111.0,117.0,119.0,119.0] +27 272.0 275.0 278.0 [272.0,275.0,278.0,278.0] +33 331.0 333.0 339.0 [331.0,333.0,338.92,339.0] +37 373.0 376.0 379.0 [373.0,376.0,378.95,379.0] +23 230.0 234.0 239.0 [230.0,234.0,239.0,239.0] +45 452.0 455.0 459.0 [452.0,455.0,459.0,459.0] +1 10.0 15.0 19.0 [10.0,15.0,18.91,19.0] +17 170.0 175.0 179.0 [170.0,175.0,179.0,179.0] +7 70.0 73.0 78.0 [70.0,73.0,77.91000000000001,78.0] +3 30.0 35.0 37.0 [30.0,35.0,37.0,37.0] +9 90.0 95.0 98.0 [90.0,95.0,98.0,98.0] +49 490.0 494.5 498.0 [490.0,494.5,498.0,498.0] +31 310.0 316.0 318.0 [310.0,316.0,318.0,318.0] +43 430.0 435.0 439.0 [430.0,435.0,439.0,439.0] +5 51.0 54.0 58.0 [51.0,54.0,58.0,58.0] +PREHOOK: query: -- test null handling +SELECT CAST(key AS INT) DIV 10, + percentile(NULL, 0.0), + percentile(NULL, array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- test null handling +SELECT CAST(key AS INT) DIV 10, + percentile(NULL, 0.0), + percentile(NULL, array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +34 NULL NULL +4 NULL NULL +16 NULL NULL +22 NULL NULL +28 NULL NULL +46 NULL NULL +48 NULL NULL +30 NULL NULL +14 NULL NULL +32 NULL NULL +36 NULL NULL +0 NULL NULL +24 NULL NULL +42 NULL NULL +40 NULL NULL +6 NULL NULL +8 NULL NULL +12 NULL NULL +38 NULL NULL +20 NULL NULL +44 NULL NULL +18 NULL NULL +26 NULL NULL +10 NULL NULL +2 NULL NULL +13 NULL NULL +19 NULL NULL +41 NULL NULL +39 NULL NULL +15 NULL NULL +21 NULL NULL +47 NULL NULL +25 NULL NULL +29 NULL NULL +11 NULL NULL +35 NULL NULL +27 NULL NULL +37 NULL NULL +33 NULL NULL +23 NULL NULL +45 NULL NULL +1 NULL NULL +17 NULL NULL +3 NULL NULL +7 NULL NULL +9 NULL NULL +49 NULL NULL +31 NULL NULL +43 NULL NULL +5 NULL NULL +PREHOOK: query: -- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- test empty array handling +SELECT CAST(key AS INT) DIV 10, + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), 0.5), + percentile(IF(CAST(key AS INT) DIV 10 < 5, 1, NULL), array(0.0, 0.5, 0.99, 1.0)) +FROM src +GROUP BY CAST(key AS INT) DIV 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +34 NULL NULL +4 1.0 [1.0,1.0,1.0,1.0] +16 NULL NULL +22 NULL NULL +28 NULL NULL +46 NULL NULL +48 NULL NULL +30 NULL NULL +14 NULL NULL +32 NULL NULL +36 NULL NULL +0 1.0 [1.0,1.0,1.0,1.0] +24 NULL NULL +42 NULL NULL +40 NULL NULL +6 NULL NULL +8 NULL NULL +12 NULL NULL +38 NULL NULL +20 NULL NULL +44 NULL NULL +18 NULL NULL +26 NULL NULL +10 NULL NULL +2 1.0 [1.0,1.0,1.0,1.0] +13 NULL NULL +19 NULL NULL +41 NULL NULL +39 NULL NULL +15 NULL NULL +21 NULL NULL +47 NULL NULL +25 NULL NULL +29 NULL NULL +11 NULL NULL +35 NULL NULL +27 NULL NULL +37 NULL NULL +33 NULL NULL +23 NULL NULL +45 NULL NULL +1 1.0 [1.0,1.0,1.0,1.0] +17 NULL NULL +3 1.0 [1.0,1.0,1.0,1.0] +7 NULL NULL +9 NULL NULL +49 NULL NULL +31 NULL NULL +43 NULL NULL +5 NULL NULL +PREHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select percentile(cast(key as bigint), 0.5) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL +PREHOOK: query: -- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: -- test where percentile list is empty +select percentile(cast(key as bigint), array()) from src where false +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +NULL