Uploaded image for project: 'IMPALA'
  1. IMPALA
  2. IMPALA-4302

In-predicate filters do not scale as expected with # of elements constant IN-list

    Details

      Description

      ExprContext::GetValue is called per item in a constant IN-list which is not needed, this makes in-list predicates >10x slower than an equivalent filter through RuntimeFilter BloomFilter and query run time grows linearly with number of items in the constant IN-list.

      The code below is responsible for the slowdown, getting the value of each item in the list per row is unnecessary

      void ScalarFnCall::EvaluateChildren(ExprContext* context, const TupleRow* row,
                                          vector<AnyVal*>* input_vals) {
        DCHECK_EQ(input_vals->size(), NumFixedArgs());
        FunctionContext* fn_ctx = context->fn_context(fn_context_index_);
        uint8_t* varargs_buffer = fn_ctx->impl()->varargs_buffer();
        for (int i = 0; i < children_.size(); ++i) {
          void* src_slot = context->GetValue(children_[i], row);
          AnyVal* dst_val;
          if (vararg_start_idx_ == -1 || i < vararg_start_idx_) {
            dst_val = (*input_vals)[i];
          } else {
            dst_val = reinterpret_cast<AnyVal*>(varargs_buffer);
            varargs_buffer += AnyValUtil::AnyValSize(children_[i]->type());
          }
          AnyValUtil::SetAnyVal(src_slot, children_[i]->type(), dst_val);
        }
      }
      

      Repro

      create table in_list_keys stored as parquet as  select distinct c_custkey from customer where c_custkey in (1,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101,106,111,116,121,126,131,136,141,146,151,156,161,166,171,176,181,186,191,196,201,206,211,216,221,226,231,236,241,246,251,256,261,266,271,276,281,286,291,296,301,306,311,316,321,326,331,336,341,346,351,356,361,366,371,376,381,386,391,396,401,406,411,416,421,426,431,436,441,446,451,456,461,466,471,476,481,486,491,496,501,506,511,516,521,526,531,536,541,546,551,556,561,566,571,576,581,586,591,596,601,606,611,616,621,626,631,636,641,646,651,656,661,666,671,676,681,686,691,696,701,706,711,716,721,726,731,736,741,746,751,756,761,766,771,776,781,786,791,796,801,806,811,816,821,826,831,836,841,846,851,856,861,866,871,876,881,886,891,896,901,906,911,916,921,926,931,936,941,946,951,956,961,966,971,976,981,986,991,996,1001,1006,1011,1016,1021,1026,1031,1036,1041,1046,1051,1056,1061,1066,1071,1076,1081,1086,1091,1096,1101,1106,1111,1116,1121,1126,1131,1136,1141,1146,1151,1156,1161,1166,1171,1176,1181,1186,1191,1196,1201,1206,1211,1216,1221,1226,1231,1236,1241,1246,1251,1256,1261,1266,1271,1276,1281,1286,1291,1296,1301,1306,1311,1316,1321,1326,1331,1336,1341,1346,1351,1356,1361,1366,1371,1376,1381,1386,1391,1396,1401,1406,1411,1416,1421,1426,1431,1436,1441,1446,1451,1456,1461,1466,1471,1476,1481,1486,1491,1496,1501,1506,1511,1516,1521,1526,1531,1536,1541,1546,1551,1556,1561,1566,1571,1576,1581,1586,1591,1596,1601,1606,1611,1616,1621,1626,1631,1636,1641,1646,1651,1656,1661,1666,1671,1676,1681,1686,1691,1696,1701,1706,1711,1716,1721,1726,1731,1736,1741,1746,1751,1756,1761,1766,1771,1776,1781,1786,1791,1796,1801,1806,1811,1816,1821,1826,1831,1836,1841,1846,1851,1856,1861,1866,1871,1876,1881,1886,1891,1896,1901,1906,1911,1916,1921,1926,1931,1936,1941,1946,1951,1956,1961,1966,1971,1976,1981,1986,1991,1996,2001,2006,2011,2016,2021,2026,2031,2036,2041,2046,2051,2056,2061,2066,2071,2076,2081,2086,2091,2096,2101,2106,2111,2116,2121,2126,2131,2136,2141,2146,2151,2156,2161,2166,2171,2176,2181,2186,2191,2196,2201,2206,2211,2216,2221,2226,2231,2236,2241,2246,2251,2256,2261,2266,2271,2276,2281,2286);
      compute stats in_list_keys;
      

      Slow in-list query which finished in 23 seconds for 288 Million rows of Lineitem

      select count(*) from lineitem where l_partkey in (1,6,11,16,21,26,31,36,41,46,51,56,61,66,71,76,81,86,91,96,101,106,111,116,121,126,131,136,141,146,151,156,161,166,171,176,181,186,191,196,201,206,211,216,221,226,231,236,241,246,251,256,261,266,271,276,281,286,291,296,301,306,311,316,321,326,331,336,341,346,351,356,361,366,371,376,381,386,391,396,401,406,411,416,421,426,431,436,441,446,451,456,461,466,471,476,481,486,491,496,501,506,511,516,521,526,531,536,541,546,551,556,561,566,571,576,581,586,591,596,601,606,611,616,621,626,631,636,641,646,651,656,661,666,671,676,681,686,691,696,701,706,711,716,721,726,731,736,741,746,751,756,761,766,771,776,781,786,791,796,801,806,811,816,821,826,831,836,841,846,851,856,861,866,871,876,881,886,891,896,901,906,911,916,921,926,931,936,941,946,951,956,961,966,971,976,981,986,991,996,1001,1006,1011,1016,1021,1026,1031,1036,1041,1046,1051,1056,1061,1066,1071,1076,1081,1086,1091,1096,1101,1106,1111,1116,1121,1126,1131,1136,1141,1146,1151,1156,1161,1166,1171,1176,1181,1186,1191,1196,1201,1206,1211,1216,1221,1226,1231,1236,1241,1246,1251,1256,1261,1266,1271,1276,1281,1286,1291,1296,1301,1306,1311,1316,1321,1326,1331,1336,1341,1346,1351,1356,1361,1366,1371,1376,1381,1386,1391,1396,1401,1406,1411,1416,1421,1426,1431,1436,1441,1446,1451,1456,1461,1466,1471,1476,1481,1486,1491,1496,1501,1506,1511,1516,1521,1526,1531,1536,1541,1546,1551,1556,1561,1566,1571,1576,1581,1586,1591,1596,1601,1606,1611,1616,1621,1626,1631,1636,1641,1646,1651,1656,1661,1666,1671,1676,1681,1686,1691,1696,1701,1706,1711,1716,1721,1726,1731,1736,1741,1746,1751,1756,1761,1766,1771,1776,1781,1786,1791,1796,1801,1806,1811,1816,1821,1826,1831,1836,1841,1846,1851,1856,1861,1866,1871,1876,1881,1886,1891,1896,1901,1906,1911,1916,1921,1926,1931,1936,1941,1946,1951,1956,1961,1966,1971,1976,1981,1986,1991,1996,2001,2006,2011,2016,2021,2026,2031,2036,2041,2046,2051,2056,2061,2066,2071,2076,2081,2086,2091,2096,2101,2106,2111,2116,2121,2126,2131,2136,2141,2146,2151,2156,2161,2166,2171,2176,2181,2186,2191,2196,2201,2206,2211,2216,2221,2226,2231,2236,2241,2246,2251,2256,2261,2266,2271,2276,2281,2286);
      

      Much faster query which uses BloomFilter scan operator takes 2s769ms.

      select count(*) from lineitem_large ,in_list_keys ik where l_partkey=c_custkey;
      

      Profile snippet for scan with runtime filters

              HDFS_SCAN_NODE (id=0):(Total: 2s769ms, non-child: 2s769ms, % non-child: 100.00%)
                ExecOption: Expr Evaluation Codegen Disabled, PARQUET Codegen Enabled, Codegen enabled: 44 out of 44
                Hdfs split stats (<volume id>:<# splits>/<split lengths>): 0:44/9.95 GB 
                Runtime filters: All filters arrived. Waited 0
                Hdfs Read Thread Concurrency Bucket: 0:87.5% 1:12.5% 2:0% 3:0% 4:0% 
                File Formats: PARQUET/SNAPPY:44 
                BytesRead(500.000ms): 90.83 MB, 260.69 MB, 453.00 MB, 648.54 MB, 834.41 MB, 1.02 GB, 1.18 GB, 1.34 GB
                 - AverageHdfsReadThreadConcurrency: 0.12 
                 - AverageScannerThreadConcurrency: 3.62 
                 - BytesRead: 1.34 GB (1433770420)
                 - BytesReadDataNodeCache: 0
                 - BytesReadLocal: 1.34 GB (1433770420)
                 - BytesReadRemoteUnexpected: 0
                 - BytesReadShortCircuit: 1.34 GB (1433770420)
                 - DecompressionTime: 2s702ms
                 - MaxCompressedTextFileLength: 0
                 - NumColumns: 1 (1)
                 - NumDisksAccessed: 1 (1)
                 - NumRowGroups: 44 (44)
                 - NumScannerThreadsStarted: 4 (4)
                 - PeakMemoryUsage: 163.15 MB (171075512)
                 - PerReadThreadRawHdfsThroughput: 3.02 GB/sec
                 - RemoteScanRanges: 0 (0)
                 - RowBatchQueueGetWaitTime: 2s762ms
                 - RowBatchQueuePutWaitTime: 0.000ns
                 - RowsRead: 288.00M (288001184)
                 - RowsReturned: 109.31K (109312)
                 - RowsReturnedRate: 39.47 K/sec
                 - ScanRangesComplete: 44 (44)
                 - ScannerThreadsInvoluntaryContextSwitches: 906 (906)
                 - ScannerThreadsTotalWallClockTime: 13s869ms
                   - MaterializeTupleTime(*): 10s721ms
                   - ScannerThreadsSysTime: 731.018ms
                   - ScannerThreadsUserTime: 12s779ms
                 - ScannerThreadsVoluntaryContextSwitches: 274 (274)
                 - TotalRawHdfsReadTime(*): 442.479ms
                 - TotalReadThroughput: 341.84 MB/sec
                Filter 0 (1.00 MB):
                   - Rows processed: 288.00M (288001184)
                   - Rows rejected: 287.89M (287891872)
                   - Rows total: 288.00M (288001184)
      

      Profile snippet for scan with slow in-predicate

      HDFS_SCAN_NODE (id=0):(Total: 22s469ms, non-child: 22s469ms, % non-child: 100.00%)
                ExecOption: Expr Evaluation Codegen Enabled, PARQUET Codegen Enabled, Codegen enabled: 44 out of 44
                Hdfs split stats (<volume id>:<# splits>/<split lengths>): 0:44/9.95 GB 
                Hdfs Read Thread Concurrency Bucket: 0:97.83% 1:2.174% 2:0% 3:0% 4:0% 
                File Formats: PARQUET/SNAPPY:44 
                BytesRead(500.000ms): 0, 137.86 MB, 137.86 MB, 137.86 MB, 137.86 MB, 249.11 MB, 249.11 MB, 249.11 MB, 308.89 MB, 376.81 MB, 376.81 MB, 410.85 MB, 444.79 MB, 512.72 MB, 512.72 MB, 546.69 MB, 546.69 MB, 546.69 MB, 648.54 MB, 648.54 MB, 682.52 MB, 682.52 MB, 750.52 MB, 784.45 MB, 805.28 MB, 805.28 MB, 873.21 MB, 929.70 MB, 929.70 MB, 937.80 MB, 989.49 MB, 1.03 GB, 1.03 GB, 1.03 GB, 1.04 GB, 1.17 GB, 1.17 GB, 1.17 GB, 1.17 GB, 1.20 GB, 1.24 GB, 1.27 GB, 1.30 GB, 1.34 GB, 1.34 GB, 1.34 GB, 1.34 GB
                 - AverageHdfsReadThreadConcurrency: 0.02 
                 - AverageScannerThreadConcurrency: 3.93 
                 - BytesRead: 1.34 GB (1433770420)
                 - BytesReadDataNodeCache: 0
                 - BytesReadLocal: 1.34 GB (1433770420)
                 - BytesReadRemoteUnexpected: 0
                 - BytesReadShortCircuit: 1.34 GB (1433770420)
                 - DecompressionTime: 2s549ms
                 - MaxCompressedTextFileLength: 0
                 - NumColumns: 1 (1)
                 - NumDisksAccessed: 1 (1)
                 - NumRowGroups: 44 (44)
                 - NumScannerThreadsStarted: 4 (4)
                 - PeakMemoryUsage: 167.18 MB (175303488)
                 - PerReadThreadRawHdfsThroughput: 570.71 MB/sec
                 - RemoteScanRanges: 0 (0)
                 - RowBatchQueueGetWaitTime: 22s454ms
                 - RowBatchQueuePutWaitTime: 0.000ns
                 - RowsRead: 288.00M (288001184)
                 - RowsReturned: 109.31K (109312)
                 - RowsReturnedRate: 4.86 K/sec
                 - ScanRangesComplete: 44 (44)
                 - ScannerThreadsInvoluntaryContextSwitches: 6.02K (6019)
                 - ScannerThreadsTotalWallClockTime: 1m31s
                   - MaterializeTupleTime(*): 1m27s
                   - ScannerThreadsSysTime: 939.121ms
                   - ScannerThreadsUserTime: 1m29s
                 - ScannerThreadsVoluntaryContextSwitches: 185 (185)
                 - TotalRawHdfsReadTime(*): 2s395ms
                 - TotalReadThroughput: 58.19 MB/sec
      

        Attachments

          Issue Links

            Activity

              People

              • Assignee:
                tarmstrong Tim Armstrong
                Reporter:
                mmokhtar Mostafa Mokhtar
              • Votes:
                0 Vote for this issue
                Watchers:
                5 Start watching this issue

                Dates

                • Created:
                  Updated:
                  Resolved: