Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Fixed
-
Impala 2.5.0
Description
ExprContext::GetValue is called per item in a constant IN-list which is not needed, this makes in-list predicates >10x slower than an equivalent filter through RuntimeFilter BloomFilter and query run time grows linearly with number of items in the constant IN-list.
The code below is responsible for the slowdown, getting the value of each item in the list per row is unnecessary
void ScalarFnCall::EvaluateChildren(ExprContext* context, const TupleRow* row, vector<AnyVal*>* input_vals) { DCHECK_EQ(input_vals->size(), NumFixedArgs()); FunctionContext* fn_ctx = context->fn_context(fn_context_index_); uint8_t* varargs_buffer = fn_ctx->impl()->varargs_buffer(); for (int i = 0; i < children_.size(); ++i) { void* src_slot = context->GetValue(children_[i], row); AnyVal* dst_val; if (vararg_start_idx_ == -1 || i < vararg_start_idx_) { dst_val = (*input_vals)[i]; } else { dst_val = reinterpret_cast<AnyVal*>(varargs_buffer); varargs_buffer += AnyValUtil::AnyValSize(children_[i]->type()); } AnyValUtil::SetAnyVal(src_slot, children_[i]->type(), dst_val); } }
Repro
create table in_list_keys stored as parquet as select distinct c_custkey from customer where c_custkey in (1,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101,106,111,116,121,126,131,136,141,146,151,156,161,166,171,176,181,186,191,196,201,206,211,216,221,226,231,236,241,246,251,256,261,266,271,276,281,286,291,296,301,306,311,316,321,326,331,336,341,346,351,356,361,366,371,376,381,386,391,396,401,406,411,416,421,426,431,436,441,446,451,456,461,466,471,476,481,486,491,496,501,506,511,516,521,526,531,536,541,546,551,556,561,566,571,576,581,586,591,596,601,606,611,616,621,626,631,636,641,646,651,656,661,666,671,676,681,686,691,696,701,706,711,716,721,726,731,736,741,746,751,756,761,766,771,776,781,786,791,796,801,806,811,816,821,826,831,836,841,846,851,856,861,866,871,876,881,886,891,896,901,906,911,916,921,926,931,936,941,946,951,956,961,966,971,976,981,986,991,996,1001,1006,1011,1016,1021,1026,1031,1036,1041,1046,1051,1056,1061,1066,1071,1076,1081,1086,1091,1096,1101,1106,1111,1116,1121,1126,1131,1136,1141,1146,1151,1156,1161,1166,1171,1176,1181,1186,1191,1196,1201,1206,1211,1216,1221,1226,1231,1236,1241,1246,1251,1256,1261,1266,1271,1276,1281,1286,1291,1296,1301,1306,1311,1316,1321,1326,1331,1336,1341,1346,1351,1356,1361,1366,1371,1376,1381,1386,1391,1396,1401,1406,1411,1416,1421,1426,1431,1436,1441,1446,1451,1456,1461,1466,1471,1476,1481,1486,1491,1496,1501,1506,1511,1516,1521,1526,1531,1536,1541,1546,1551,1556,1561,1566,1571,1576,1581,1586,1591,1596,1601,1606,1611,1616,1621,1626,1631,1636,1641,1646,1651,1656,1661,1666,1671,1676,1681,1686,1691,1696,1701,1706,1711,1716,1721,1726,1731,1736,1741,1746,1751,1756,1761,1766,1771,1776,1781,1786,1791,1796,1801,1806,1811,1816,1821,1826,1831,1836,1841,1846,1851,1856,1861,1866,1871,1876,1881,1886,1891,1896,1901,1906,1911,1916,1921,1926,1931,1936,1941,1946,1951,1956,1961,1966,1971,1976,1981,1986,1991,1996,2001,2006,2011,2016,2021,2026,2031,2036,2041,2046,2051,2056,2061,2066,2071,2076,2081,2086,2091,2096,2101,2106,2111,2116,2121,2126,2131,2136,2141,2146,2151,2156,2161,2166,2171,2176,2181,2186,2191,2196,2201,2206,2211,2216,2221,2226,2231,2236,2241,2246,2251,2256,2261,2266,2271,2276,2281,2286); compute stats in_list_keys;
Slow in-list query which finished in 23 seconds for 288 Million rows of Lineitem
select count(*) from lineitem where l_partkey in (1,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101,106,111,116,121,126,131,136,141,146,151,156,161,166,171,176,181,186,191,196,201,206,211,216,221,226,231,236,241,246,251,256,261,266,271,276,281,286,291,296,301,306,311,316,321,326,331,336,341,346,351,356,361,366,371,376,381,386,391,396,401,406,411,416,421,426,431,436,441,446,451,456,461,466,471,476,481,486,491,496,501,506,511,516,521,526,531,536,541,546,551,556,561,566,571,576,581,586,591,596,601,606,611,616,621,626,631,636,641,646,651,656,661,666,671,676,681,686,691,696,701,706,711,716,721,726,731,736,741,746,751,756,761,766,771,776,781,786,791,796,801,806,811,816,821,826,831,836,841,846,851,856,861,866,871,876,881,886,891,896,901,906,911,916,921,926,931,936,941,946,951,956,961,966,971,976,981,986,991,996,1001,1006,1011,1016,1021,1026,1031,1036,1041,1046,1051,1056,1061,1066,1071,1076,1081,1086,1091,1096,1101,1106,1111,1116,1121,1126,1131,1136,1141,1146,1151,1156,1161,1166,1171,1176,1181,1186,1191,1196,1201,1206,1211,1216,1221,1226,1231,1236,1241,1246,1251,1256,1261,1266,1271,1276,1281,1286,1291,1296,1301,1306,1311,1316,1321,1326,1331,1336,1341,1346,1351,1356,1361,1366,1371,1376,1381,1386,1391,1396,1401,1406,1411,1416,1421,1426,1431,1436,1441,1446,1451,1456,1461,1466,1471,1476,1481,1486,1491,1496,1501,1506,1511,1516,1521,1526,1531,1536,1541,1546,1551,1556,1561,1566,1571,1576,1581,1586,1591,1596,1601,1606,1611,1616,1621,1626,1631,1636,1641,1646,1651,1656,1661,1666,1671,1676,1681,1686,1691,1696,1701,1706,1711,1716,1721,1726,1731,1736,1741,1746,1751,1756,1761,1766,1771,1776,1781,1786,1791,1796,1801,1806,1811,1816,1821,1826,1831,1836,1841,1846,1851,1856,1861,1866,1871,1876,1881,1886,1891,1896,1901,1906,1911,1916,1921,1926,1931,1936,1941,1946,1951,1956,1961,1966,1971,1976,1981,1986,1991,1996,2001,2006,2011,2016,2021,2026,2031,2036,2041,2046,2051,2056,2061,2066,2071,2076,2081,2086,2091,2096,2101,2106,2111,2116,2121,2126,2131,2136,2141,2146,2151,2156,2161,2166,2171,2176,2181,2186,2191,2196,2201,2206,2211,2216,2221,2226,2231,2236,2241,2246,2251,2256,2261,2266,2271,2276,2281,2286);
Much faster query which uses BloomFilter scan operator takes 2s769ms.
select count(*) from lineitem_large ,in_list_keys ik where l_partkey=c_custkey;
Profile snippet for scan with runtime filters
HDFS_SCAN_NODE (id=0):(Total: 2s769ms, non-child: 2s769ms, % non-child: 100.00%) ExecOption: Expr Evaluation Codegen Disabled, PARQUET Codegen Enabled, Codegen enabled: 44 out of 44 Hdfs split stats (<volume id>:<# splits>/<split lengths>): 0:44/9.95 GB Runtime filters: All filters arrived. Waited 0 Hdfs Read Thread Concurrency Bucket: 0:87.5% 1:12.5% 2:0% 3:0% 4:0% File Formats: PARQUET/SNAPPY:44 BytesRead(500.000ms): 90.83 MB, 260.69 MB, 453.00 MB, 648.54 MB, 834.41 MB, 1.02 GB, 1.18 GB, 1.34 GB - AverageHdfsReadThreadConcurrency: 0.12 - AverageScannerThreadConcurrency: 3.62 - BytesRead: 1.34 GB (1433770420) - BytesReadDataNodeCache: 0 - BytesReadLocal: 1.34 GB (1433770420) - BytesReadRemoteUnexpected: 0 - BytesReadShortCircuit: 1.34 GB (1433770420) - DecompressionTime: 2s702ms - MaxCompressedTextFileLength: 0 - NumColumns: 1 (1) - NumDisksAccessed: 1 (1) - NumRowGroups: 44 (44) - NumScannerThreadsStarted: 4 (4) - PeakMemoryUsage: 163.15 MB (171075512) - PerReadThreadRawHdfsThroughput: 3.02 GB/sec - RemoteScanRanges: 0 (0) - RowBatchQueueGetWaitTime: 2s762ms - RowBatchQueuePutWaitTime: 0.000ns - RowsRead: 288.00M (288001184) - RowsReturned: 109.31K (109312) - RowsReturnedRate: 39.47 K/sec - ScanRangesComplete: 44 (44) - ScannerThreadsInvoluntaryContextSwitches: 906 (906) - ScannerThreadsTotalWallClockTime: 13s869ms - MaterializeTupleTime(*): 10s721ms - ScannerThreadsSysTime: 731.018ms - ScannerThreadsUserTime: 12s779ms - ScannerThreadsVoluntaryContextSwitches: 274 (274) - TotalRawHdfsReadTime(*): 442.479ms - TotalReadThroughput: 341.84 MB/sec Filter 0 (1.00 MB): - Rows processed: 288.00M (288001184) - Rows rejected: 287.89M (287891872) - Rows total: 288.00M (288001184)
Profile snippet for scan with slow in-predicate
HDFS_SCAN_NODE (id=0):(Total: 22s469ms, non-child: 22s469ms, % non-child: 100.00%)
ExecOption: Expr Evaluation Codegen Enabled, PARQUET Codegen Enabled, Codegen enabled: 44 out of 44
Hdfs split stats (<volume id>:<# splits>/<split lengths>): 0:44/9.95 GB
Hdfs Read Thread Concurrency Bucket: 0:97.83% 1:2.174% 2:0% 3:0% 4:0%
File Formats: PARQUET/SNAPPY:44
BytesRead(500.000ms): 0, 137.86 MB, 137.86 MB, 137.86 MB, 137.86 MB, 249.11 MB, 249.11 MB, 249.11 MB, 308.89 MB, 376.81 MB, 376.81 MB, 410.85 MB, 444.79 MB, 512.72 MB, 512.72 MB, 546.69 MB, 546.69 MB, 546.69 MB, 648.54 MB, 648.54 MB, 682.52 MB, 682.52 MB, 750.52 MB, 784.45 MB, 805.28 MB, 805.28 MB, 873.21 MB, 929.70 MB, 929.70 MB, 937.80 MB, 989.49 MB, 1.03 GB, 1.03 GB, 1.03 GB, 1.04 GB, 1.17 GB, 1.17 GB, 1.17 GB, 1.17 GB, 1.20 GB, 1.24 GB, 1.27 GB, 1.30 GB, 1.34 GB, 1.34 GB, 1.34 GB, 1.34 GB
- AverageHdfsReadThreadConcurrency: 0.02
- AverageScannerThreadConcurrency: 3.93
- BytesRead: 1.34 GB (1433770420)
- BytesReadDataNodeCache: 0
- BytesReadLocal: 1.34 GB (1433770420)
- BytesReadRemoteUnexpected: 0
- BytesReadShortCircuit: 1.34 GB (1433770420)
- DecompressionTime: 2s549ms
- MaxCompressedTextFileLength: 0
- NumColumns: 1 (1)
- NumDisksAccessed: 1 (1)
- NumRowGroups: 44 (44)
- NumScannerThreadsStarted: 4 (4)
- PeakMemoryUsage: 167.18 MB (175303488)
- PerReadThreadRawHdfsThroughput: 570.71 MB/sec
- RemoteScanRanges: 0 (0)
- RowBatchQueueGetWaitTime: 22s454ms
- RowBatchQueuePutWaitTime: 0.000ns
- RowsRead: 288.00M (288001184)
- RowsReturned: 109.31K (109312)
- RowsReturnedRate: 4.86 K/sec
- ScanRangesComplete: 44 (44)
- ScannerThreadsInvoluntaryContextSwitches: 6.02K (6019)
- ScannerThreadsTotalWallClockTime: 1m31s
- MaterializeTupleTime(*): 1m27s
- ScannerThreadsSysTime: 939.121ms
- ScannerThreadsUserTime: 1m29s
- ScannerThreadsVoluntaryContextSwitches: 185 (185)
- TotalRawHdfsReadTime(*): 2s395ms
- TotalReadThroughput: 58.19 MB/sec
Attachments
Issue Links
- breaks
-
IMPALA-6873 Crash in Expr::GetConstVal() due to NULL dereference
- Resolved