diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out index d265240..5199f41 100644 --- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out +++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out @@ -368,11 +368,11 @@ val_129 130.0 val_131 132.0 PREHOOK: query: -- distincts explain -select distinct(cdouble) from alltypesorc limit 20 +select distinct(cdouble) as dis from alltypesorc order by dis limit 20 PREHOOK: type: QUERY POSTHOOK: query: -- distincts explain -select distinct(cdouble) from alltypesorc limit 20 +select distinct(cdouble) as dis from alltypesorc order by dis limit 20 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -382,7 +382,7 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 2 <- Map 1 (GROUP, 3) + Reducer 2 <- Map 1 (GROUP PARTITION-LEVEL SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -433,11 +433,11 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select distinct(cdouble) from alltypesorc limit 20 +PREHOOK: query: select distinct(cdouble) as dis from alltypesorc order by dis limit 20 PREHOOK: type: QUERY PREHOOK: Input: default@alltypesorc #### A masked pattern was here #### -POSTHOOK: query: select distinct(cdouble) from alltypesorc limit 20 +POSTHOOK: query: select distinct(cdouble) as dis from alltypesorc order by dis limit 20 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc #### A masked pattern was here #### @@ -1203,11 +1203,11 @@ POSTHOOK: Input: default@src 187 val_187 val_187 val_187 val_187 val_187 val_187 val_187 val_187 PREHOOK: query: -- flush for group-by explain -select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) limit 100 +select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 100 PREHOOK: type: QUERY POSTHOOK: query: -- flush for group-by explain -select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) limit 100 +select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 100 POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1218,6 +1218,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -1234,7 +1235,6 @@ STAGE PLANS: sort order: + Map-reduce partition columns: concat(key, value, value, value, value, value, value, value, value, value) (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE - TopN Hash Memory Usage: 2.0E-5 value expressions: key (type: string) Reducer 2 Reduce Operator Tree: @@ -1248,16 +1248,27 @@ STAGE PLANS: expressions: _col1 (type: double) outputColumnNames: _col0 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 100 + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + TopN Hash Memory Usage: 2.0E-5 + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 100 + Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 100 Data size: 1000 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -1265,111 +1276,111 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) limit 100 +PREHOOK: query: select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 100 PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) limit 100 +POSTHOOK: query: select sum(key) as sum from src group by concat(key,value,value,value,value,value,value,value,value,value) order by sum limit 100 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### +0.0 10.0 +102.0 105.0 -1107.0 +105.0 +11.0 +111.0 114.0 -1227.0 -1251.0 +116.0 +116.0 126.0 -1314.0 +131.0 133.0 +134.0 136.0 143.0 +144.0 145.0 -1494.0 15.0 +150.0 +152.0 153.0 155.0 +156.0 +157.0 +158.0 +160.0 +162.0 163.0 166.0 +166.0 +168.0 +168.0 17.0 +170.0 +177.0 +178.0 +180.0 +181.0 183.0 -1872.0 +186.0 +189.0 +19.0 +190.0 +190.0 +192.0 194.0 194.0 -1956.0 +196.0 +196.0 +2.0 +20.0 +200.0 201.0 202.0 +206.0 208.0 210.0 214.0 +218.0 222.0 226.0 -2345.0 -241.0 -250.0 -252.0 -262.0 -263.0 -268.0 -283.0 -291.0 -292.0 +226.0 +228.0 +24.0 +27.0 +28.0 +30.0 30.0 -310.0 -328.0 33.0 -330.0 -339.0 -341.0 -348.0 -350.0 -351.0 -360.0 -379.0 -389.0 -390.0 +34.0 +36.0 4.0 -406.0 41.0 -418.0 -419.0 -426.0 -437.0 -442.0 -446.0 -448.0 -448.0 -449.0 -457.0 -466.0 -467.0 -477.0 -479.0 -484.0 -487.0 -496.0 -497.0 +43.0 +44.0 +47.0 +48.0 +52.0 53.0 -544.0 -562.0 -564.0 +54.0 57.0 -579.0 -618.0 -642.0 +64.0 65.0 -662.0 +66.0 69.0 74.0 +74.0 77.0 -798.0 -819.0 +78.0 +8.0 +80.0 82.0 84.0 85.0 -858.0 -878.0 -916.0 -918.0 -933.0 -956.0 +86.0 +87.0 +9.0 +92.0 +96.0 diff --git a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out index 7968853..18f5c1e 100644 --- a/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out +++ b/ql/src/test/results/clientpositive/spark/optimize_nullscan.q.out @@ -1,4 +1,4 @@ -PREHOOK: query: -- SORT_BEFORE_DIFF +PREHOOK: query: -- SORT_QUERY_RESULTS -- Disable CBO here, because it messes with the cases specifically crafted for the optimizer. -- Instead, we could improve the optimizer to recognize more cases, e.g. filter before join. @@ -6,7 +6,7 @@ PREHOOK: query: -- SORT_BEFORE_DIFF explain extended select key from src where false PREHOOK: type: QUERY -POSTHOOK: query: -- SORT_BEFORE_DIFF +POSTHOOK: query: -- SORT_QUERY_RESULTS -- Disable CBO here, because it messes with the cases specifically crafted for the optimizer. -- Instead, we could improve the optimizer to recognize more cases, e.g. filter before join. diff --git a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out index eaca856..6298cb3 100644 --- a/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out +++ b/ql/src/test/results/clientpositive/spark/ppd_gby_join.q.out @@ -1,4 +1,6 @@ -PREHOOK: query: EXPLAIN +PREHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN SELECT src1.c1, count(1) FROM (SELECT src.key AS c1, src.value AS c2 from src where src.key > '1' ) src1 @@ -8,7 +10,9 @@ ON src1.c1 = src2.c3 AND src1.c1 < '400' WHERE src1.c1 > '20' AND (src1.c2 < 'val_50' OR src1.c1 > '2') AND (src2.c3 > '50' OR src1.c1 < '50') AND (src2.c3 <> '4') GROUP BY src1.c1 PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: -- SORT_QUERY_RESULTS + +EXPLAIN SELECT src1.c1, count(1) FROM (SELECT src.key AS c1, src.value AS c2 from src where src.key > '1' ) src1 @@ -148,135 +152,135 @@ GROUP BY src1.c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -309 4 -378 1 -333 4 -302 1 -306 1 -392 1 -242 4 -238 4 -384 9 -315 1 -30 1 -395 4 -229 4 -292 1 -325 4 +200 4 +201 1 +202 1 +203 4 +205 4 +207 4 +208 9 +209 4 +213 4 +214 1 216 4 +217 4 +218 1 +219 4 +221 4 +222 1 +223 4 +224 4 +226 1 +228 1 +229 4 +230 25 233 4 +235 1 237 4 -217 4 -362 1 +238 4 239 4 -200 4 -201 1 -285 1 -399 4 -222 1 -265 4 -336 1 -260 1 -256 4 -360 1 +24 4 +241 1 +242 4 244 1 +247 1 +248 1 +249 1 252 1 -281 4 -339 1 -283 1 -368 1 255 4 -266 1 -262 1 -365 1 -317 4 -396 9 -307 4 -230 25 -286 1 -24 4 -335 1 -373 1 -316 9 -353 4 -367 4 -382 4 +256 4 257 1 -394 1 -366 1 -248 1 -288 4 -235 1 +258 1 +26 4 +260 1 +262 1 +263 1 +265 4 +266 1 +27 1 +272 4 273 9 -221 4 -342 4 -226 1 -397 4 -33 1 -224 4 -389 1 -296 1 -34 1 -386 1 +274 1 +275 1 +277 16 +278 4 28 1 -272 4 -214 1 -348 25 -369 9 -202 1 -37 4 +280 4 +281 4 +282 4 +283 1 +284 1 +285 1 +286 1 +287 1 +288 4 289 1 -27 1 -223 4 +291 1 +292 1 +296 1 +298 9 +30 1 +302 1 +305 1 +306 1 +307 4 +308 1 +309 4 310 1 +311 9 +315 1 +316 9 +317 4 318 9 -228 1 -379 1 -278 4 -280 4 -287 1 -393 1 +321 4 +322 4 +323 1 +325 4 +327 9 +33 1 +331 4 +332 1 +333 4 +335 1 +336 1 +338 1 +339 1 +34 1 341 1 -305 1 +342 4 344 4 -274 1 -209 4 -364 1 -219 4 +345 1 +348 25 35 9 -323 1 -203 4 -213 4 -282 4 -218 1 -247 1 -207 4 -338 1 -205 4 -311 9 -331 4 351 1 -284 1 -208 9 -26 4 -275 1 -322 4 -291 1 -375 1 -308 1 -345 1 -298 9 -377 1 -277 16 +353 4 356 1 -327 9 -332 1 -321 4 -263 1 -249 1 -241 1 -258 1 +360 1 +362 1 +364 1 +365 1 +366 1 +367 4 +368 1 +369 9 +37 4 +373 1 374 1 +375 1 +377 1 +378 1 +379 1 +382 4 +384 9 +386 1 +389 1 +392 1 +393 1 +394 1 +395 4 +396 9 +397 4 +399 4 PREHOOK: query: EXPLAIN SELECT src1.c1, count(1) FROM @@ -421,132 +425,132 @@ GROUP BY src1.c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -255 4 -213 4 -309 4 -282 4 -333 4 -378 1 -306 1 -396 9 -30 1 -384 9 -315 1 +200 4 +201 1 +202 1 +203 4 +205 4 207 4 -348 25 -24 4 -369 9 -351 1 -27 1 -318 9 +208 9 +209 4 +213 4 +214 1 216 4 +217 4 +218 1 +219 4 +221 4 +222 1 +223 4 +224 4 +226 1 228 1 -291 1 +229 4 +230 25 +233 4 +235 1 237 4 -252 1 -375 1 -345 1 -366 1 -201 1 -285 1 -399 4 -222 1 -393 1 -336 1 -288 4 -360 1 -321 4 -327 9 -273 9 -219 4 +238 4 +239 4 +24 4 +241 1 +242 4 +244 1 +247 1 +248 1 249 1 -342 4 -339 1 -33 1 +252 1 +255 4 +256 4 +257 1 258 1 -28 1 +26 4 +260 1 262 1 -34 1 -307 4 -238 4 -247 1 -286 1 -205 4 -214 1 -229 4 -331 4 -292 1 -202 1 -373 1 -37 4 -289 1 -208 9 -316 9 -310 1 -223 4 -367 4 -322 4 -382 4 -394 1 -217 4 -325 4 -298 9 -379 1 -277 16 -280 4 +263 1 265 4 -256 4 -244 1 -235 1 +266 1 +27 1 +272 4 +273 9 274 1 -226 1 -364 1 -241 1 +275 1 +277 16 +278 4 +28 1 +280 4 +281 4 +282 4 283 1 -397 4 -389 1 +284 1 +285 1 +286 1 +287 1 +288 4 +289 1 +291 1 +292 1 296 1 -224 4 -272 4 +298 9 +30 1 302 1 -386 1 -392 1 -242 4 -395 4 -233 4 -278 4 -362 1 -239 4 -200 4 -260 1 -287 1 -341 1 305 1 -344 4 -209 4 -281 4 -35 9 -323 1 -368 1 -203 4 -266 1 -365 1 -317 4 -230 25 -218 1 -338 1 +306 1 +307 4 +308 1 +309 4 +310 1 311 9 +315 1 +316 9 +317 4 +318 9 +321 4 +322 4 +323 1 +325 4 +327 9 +33 1 +331 4 +332 1 +333 4 335 1 -284 1 +336 1 +338 1 +339 1 +34 1 +341 1 +342 4 +344 4 +345 1 +348 25 +35 9 +351 1 353 4 -26 4 -275 1 -257 1 356 1 -308 1 -377 1 -332 1 -248 1 -263 1 -221 4 +360 1 +362 1 +364 1 +365 1 +366 1 +367 4 +368 1 +369 9 +37 4 +373 1 374 1 +375 1 +377 1 +378 1 +379 1 +382 4 +384 9 +386 1 +389 1 +392 1 +393 1 +394 1 +395 4 +396 9 +397 4 +399 4 diff --git a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out index fec384f..db233db 100644 --- a/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out +++ b/ql/src/test/results/clientpositive/spark/vector_string_concat.q.out @@ -272,12 +272,14 @@ PREHOOK: query: EXPLAIN SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) + ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) + ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY STAGE DEPENDENCIES: @@ -289,6 +291,7 @@ STAGE PLANS: Spark Edges: Reducer 2 <- Map 1 (GROUP, 3) + Reducer 3 <- Reducer 2 (SORT, 1) #### A masked pattern was here #### Vertices: Map 1 @@ -318,20 +321,27 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE - Limit - Number of rows: 50 + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 1000 Data size: 459356 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 50 + Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 50 Data size: 22950 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized Stage: Stage-0 @@ -343,6 +353,7 @@ STAGE PLANS: PREHOOK: query: SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) + ORDER BY `field` LIMIT 50 PREHOOK: type: QUERY PREHOOK: Input: default@vectortab2korc @@ -350,57 +361,58 @@ PREHOOK: Input: default@vectortab2korc POSTHOOK: query: SELECT CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) AS `field` FROM vectortab2korc GROUP BY CONCAT(CONCAT(CONCAT('Quarter ',CAST(CAST((MONTH(dt) - 1) / 3 + 1 AS INT) AS STRING)),'-'),CAST(YEAR(dt) AS STRING)) + ORDER BY `field` LIMIT 50 POSTHOOK: type: QUERY POSTHOOK: Input: default@vectortab2korc #### A masked pattern was here #### -Quarter 3-2051 -Quarter 3-2045 -Quarter 4-2095 -Quarter 1-2020 -Quarter 3-2042 -Quarter 2-1971 -Quarter 1-2044 -Quarter 4-1987 -Quarter 1-2047 -Quarter 1-2101 -Quarter 4-2080 -Quarter 3-2087 -Quarter 4-2047 -Quarter 2-2061 -Quarter 2-2103 +NULL +Quarter 1-1970 +Quarter 1-1971 +Quarter 1-1972 +Quarter 1-1973 +Quarter 1-1974 +Quarter 1-1975 +Quarter 1-1976 +Quarter 1-1977 Quarter 1-1978 -Quarter 4-1984 -Quarter 3-1982 -Quarter 2-2001 -Quarter 3-2024 -Quarter 2-2076 -Quarter 1-2074 -Quarter 3-2105 -Quarter 4-1999 -Quarter 3-2072 -Quarter 3-2000 -Quarter 2-2040 -Quarter 4-1972 -Quarter 4-2041 -Quarter 1-2026 -Quarter 3-2015 -Quarter 1-2041 -Quarter 3-1979 -Quarter 3-1970 -Quarter 3-2066 -Quarter 1-2077 -Quarter 4-2059 -Quarter 1-2086 -Quarter 3-2030 -Quarter 2-2067 -Quarter 4-2065 -Quarter 3-2021 -Quarter 3-2012 -Quarter 2-2037 +Quarter 1-1979 +Quarter 1-1980 +Quarter 1-1981 +Quarter 1-1982 +Quarter 1-1983 +Quarter 1-1984 +Quarter 1-1985 +Quarter 1-1986 Quarter 1-1987 -Quarter 4-2014 -Quarter 4-2038 -Quarter 4-1975 -Quarter 1-2053 -Quarter 4-2068 +Quarter 1-1988 +Quarter 1-1989 +Quarter 1-1990 +Quarter 1-1991 +Quarter 1-1992 +Quarter 1-1993 +Quarter 1-1994 +Quarter 1-1995 +Quarter 1-1996 +Quarter 1-1997 +Quarter 1-1998 +Quarter 1-1999 +Quarter 1-2000 +Quarter 1-2001 +Quarter 1-2002 +Quarter 1-2003 +Quarter 1-2004 +Quarter 1-2005 +Quarter 1-2006 +Quarter 1-2007 +Quarter 1-2008 +Quarter 1-2009 +Quarter 1-2010 +Quarter 1-2011 +Quarter 1-2012 +Quarter 1-2013 +Quarter 1-2014 +Quarter 1-2015 +Quarter 1-2016 +Quarter 1-2017 +Quarter 1-2018