diff --git a/common/pom.xml b/common/pom.xml index e6722babd8..e5f4c6a5fe 100644 --- a/common/pom.xml +++ b/common/pom.xml @@ -227,6 +227,16 @@ dropwizard-metrics-hadoop-metrics2-reporter ${dropwizard-metrics-hadoop-metrics2-reporter.version} + + javolution + javolution + ${javolution.version} + + + it.unimi.dsi + fastutil + ${fastutil.version} + diff --git a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java index 7c9d72fbd2..70cc8caae0 100644 --- a/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/HiveStatsUtils.java @@ -83,7 +83,8 @@ public static int getNumBitVectorsForNDVEstimation(Configuration conf) throws Ex float percentageError = HiveConf.getFloatVar(conf, HiveConf.ConfVars.HIVE_STATS_NDV_ERROR); if (percentageError < 0.0) { - throw new Exception("hive.stats.ndv.error can't be negative"); + numBitVectors = -1; + LOG.info("hive.stats.ndv.error is negative. Set numBitVectors as -1 and hyperloglog will be used."); } else if (percentageError <= 2.4) { numBitVectors = 1024; LOG.info("Lowest error achievable is 2.4% but error requested is " + percentageError + "%"); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java b/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java similarity index 91% rename from ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java rename to common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java index fa70f49857..d70efea784 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/NumDistinctValueEstimator.java +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.hadoop.hive.ql.udf.generic; +package org.apache.hadoop.hive.common.ndv; import java.util.Random; import javolution.util.FastBitSet; @@ -27,9 +27,9 @@ import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.io.Text; -public class NumDistinctValueEstimator { +public class FMSketch extends NumDistinctValueEstimator{ - static final Logger LOG = LoggerFactory.getLogger(NumDistinctValueEstimator.class.getName()); + static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName()); /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number. * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1. @@ -38,7 +38,6 @@ * thus introducing errors in the estimates. */ private static final int BIT_VECTOR_SIZE = 31; - private final int numBitVectors; // Refer to Flajolet-Martin'86 for the value of phi private static final double PHI = 0.77351; @@ -52,7 +51,7 @@ /* Create a new distinctValueEstimator */ - public NumDistinctValueEstimator(int numBitVectors) { + public FMSketch(int numBitVectors) { this.numBitVectors = numBitVectors; bitVector = new FastBitSet[numBitVectors]; for (int i=0; i< numBitVectors; i++) { @@ -109,9 +108,9 @@ public NumDistinctValueEstimator(int numBitVectors) { } } - public NumDistinctValueEstimator(String s, int numBitVectors) { + public FMSketch(String s, int numBitVectors) { this.numBitVectors = numBitVectors; - FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors); + FastBitSet bitVectorDeser[] = genBitSet(s, numBitVectors); bitVector = new FastBitSet[numBitVectors]; for(int i=0; i 0) { + return new FMSketch(numBitVectors); + } else { + return null; + } + } + + public static NumDistinctValueEstimator getNumDistinctValueEstimator(String s, int numBitVectors) { + if (numBitVectors == -1) { + return HyperLogLog.builder().build().deserialize(s, numBitVectors); + } else if (numBitVectors > 0) { + return new FMSketch(s, numBitVectors); + } else { + return null; + } + } + +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLConstants.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLConstants.java new file mode 100644 index 0000000000..ded8edd93a --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLConstants.java @@ -0,0 +1,929 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.ndv.hll; + +public class HLLConstants { + + // range of register index bits + public static final int MIN_P_VALUE = 4; + public static final int MAX_P_VALUE = 16; + + // number of entries to store before being merged to sparse map + public static final int TEMP_LIST_DEFAULT_SIZE = 1024; + + // constants for SPARSE encoding + public static final int P_PRIME_VALUE = 25; + public static final int Q_PRIME_VALUE = 6; + + // data for HLL++ bias correction + public static final int K_NEAREST_NEIGHBOR = 6; + + public static final double[] thresholdData = { 10, 20, 40, 80, 220, 400, 900, 1800, 3100, 6500, + 15500, 20000, 50000, 120000, 350000 }; + + public static final double[][] rawEstimateData = { + // precision 4 + { 11, 11.717, 12.207, 12.7896, 13.2882, 13.8204, 14.3772, 14.9342, 15.5202, 16.161, 16.7722, + 17.4636, 18.0396, 18.6766, 19.3566, 20.0454, 20.7936, 21.4856, 22.2666, 22.9946, 23.766, + 24.4692, 25.3638, 26.0764, 26.7864, 27.7602, 28.4814, 29.433, 30.2926, 31.0664, 31.9996, + 32.7956, 33.5366, 34.5894, 35.5738, 36.2698, 37.3682, 38.0544, 39.2342, 40.0108, 40.7966, + 41.9298, 42.8704, 43.6358, 44.5194, 45.773, 46.6772, 47.6174, 48.4888, 49.3304, 50.2506, + 51.4996, 52.3824, 53.3078, 54.3984, 55.5838, 56.6618, 57.2174, 58.3514, 59.0802, 60.1482, + 61.0376, 62.3598, 62.8078, 63.9744, 64.914, 65.781, 67.1806, 68.0594, 68.8446, 69.7928, + 70.8248, 71.8324, 72.8598, 73.6246, 74.7014, 75.393, 76.6708, 77.2394, }, + // precision 5 + { 23, 23.1194, 23.8208, 24.2318, 24.77, 25.2436, 25.7774, 26.2848, 26.8224, 27.3742, 27.9336, + 28.503, 29.0494, 29.6292, 30.2124, 30.798, 31.367, 31.9728, 32.5944, 33.217, 33.8438, + 34.3696, 35.0956, 35.7044, 36.324, 37.0668, 37.6698, 38.3644, 39.049, 39.6918, 40.4146, + 41.082, 41.687, 42.5398, 43.2462, 43.857, 44.6606, 45.4168, 46.1248, 46.9222, 47.6804, + 48.447, 49.3454, 49.9594, 50.7636, 51.5776, 52.331, 53.19, 53.9676, 54.7564, 55.5314, + 56.4442, 57.3708, 57.9774, 58.9624, 59.8796, 60.755, 61.472, 62.2076, 63.1024, 63.8908, + 64.7338, 65.7728, 66.629, 67.413, 68.3266, 69.1524, 70.2642, 71.1806, 72.0566, 72.9192, + 73.7598, 74.3516, 75.5802, 76.4386, 77.4916, 78.1524, 79.1892, 79.8414, 80.8798, 81.8376, + 82.4698, 83.7656, 84.331, 85.5914, 86.6012, 87.7016, 88.5582, 89.3394, 90.3544, 91.4912, + 92.308, 93.3552, 93.9746, 95.2052, 95.727, 97.1322, 98.3944, 98.7588, 100.242, 101.1914, + 102.2538, 102.8776, 103.6292, 105.1932, 105.9152, 107.0868, 107.6728, 108.7144, 110.3114, + 110.8716, 111.245, 112.7908, 113.7064, 114.636, 115.7464, 116.1788, 117.7464, 118.4896, + 119.6166, 120.5082, 121.7798, 122.9028, 123.4426, 124.8854, 125.705, 126.4652, 128.3464, + 128.3462, 130.0398, 131.0342, 131.0042, 132.4766, 133.511, 134.7252, 135.425, 136.5172, + 138.0572, 138.6694, 139.3712, 140.8598, 141.4594, 142.554, 143.4006, 144.7374, 146.1634, + 146.8994, 147.605, 147.9304, 149.1636, 150.2468, 151.5876, 152.2096, 153.7032, 154.7146, + 155.807, 156.9228, 157.0372, 158.5852, }, + // precision 6 + { 46, 46.1902, 47.271, 47.8358, 48.8142, 49.2854, 50.317, 51.354, 51.8924, 52.9436, 53.4596, + 54.5262, 55.6248, 56.1574, 57.2822, 57.837, 58.9636, 60.074, 60.7042, 61.7976, 62.4772, + 63.6564, 64.7942, 65.5004, 66.686, 67.291, 68.5672, 69.8556, 70.4982, 71.8204, 72.4252, + 73.7744, 75.0786, 75.8344, 77.0294, 77.8098, 79.0794, 80.5732, 81.1878, 82.5648, 83.2902, + 84.6784, 85.3352, 86.8946, 88.3712, 89.0852, 90.499, 91.2686, 92.6844, 94.2234, 94.9732, + 96.3356, 97.2286, 98.7262, 100.3284, 101.1048, 102.5962, 103.3562, 105.1272, 106.4184, + 107.4974, 109.0822, 109.856, 111.48, 113.2834, 114.0208, 115.637, 116.5174, 118.0576, + 119.7476, 120.427, 122.1326, 123.2372, 125.2788, 126.6776, 127.7926, 129.1952, 129.9564, + 131.6454, 133.87, 134.5428, 136.2, 137.0294, 138.6278, 139.6782, 141.792, 143.3516, + 144.2832, 146.0394, 147.0748, 148.4912, 150.849, 151.696, 153.5404, 154.073, 156.3714, + 157.7216, 158.7328, 160.4208, 161.4184, 163.9424, 165.2772, 166.411, 168.1308, 168.769, + 170.9258, 172.6828, 173.7502, 175.706, 176.3886, 179.0186, 180.4518, 181.927, 183.4172, + 184.4114, 186.033, 188.5124, 189.5564, 191.6008, 192.4172, 193.8044, 194.997, 197.4548, + 198.8948, 200.2346, 202.3086, 203.1548, 204.8842, 206.6508, 206.6772, 209.7254, 210.4752, + 212.7228, 214.6614, 215.1676, 217.793, 218.0006, 219.9052, 221.66, 223.5588, 225.1636, + 225.6882, 227.7126, 229.4502, 231.1978, 232.9756, 233.1654, 236.727, 238.1974, 237.7474, + 241.1346, 242.3048, 244.1948, 245.3134, 246.879, 249.1204, 249.853, 252.6792, 253.857, + 254.4486, 257.2362, 257.9534, 260.0286, 260.5632, 262.663, 264.723, 265.7566, 267.2566, + 267.1624, 270.62, 272.8216, 273.2166, 275.2056, 276.2202, 278.3726, 280.3344, 281.9284, + 283.9728, 284.1924, 286.4872, 287.587, 289.807, 291.1206, 292.769, 294.8708, 296.665, + 297.1182, 299.4012, 300.6352, 302.1354, 304.1756, 306.1606, 307.3462, 308.5214, 309.4134, + 310.8352, 313.9684, 315.837, 316.7796, 318.9858, }, + // precision 7 + { 92, 93.4934, 94.9758, 96.4574, 97.9718, 99.4954, 101.5302, 103.0756, 104.6374, 106.1782, + 107.7888, 109.9522, 111.592, 113.2532, 114.9086, 116.5938, 118.9474, 120.6796, 122.4394, + 124.2176, 125.9768, 128.4214, 130.2528, 132.0102, 133.8658, 135.7278, 138.3044, 140.1316, + 142.093, 144.0032, 145.9092, 148.6306, 150.5294, 152.5756, 154.6508, 156.662, 159.552, + 161.3724, 163.617, 165.5754, 167.7872, 169.8444, 172.7988, 174.8606, 177.2118, 179.3566, + 181.4476, 184.5882, 186.6816, 189.0824, 191.0258, 193.6048, 196.4436, 198.7274, 200.957, + 203.147, 205.4364, 208.7592, 211.3386, 213.781, 215.8028, 218.656, 221.6544, 223.996, + 226.4718, 229.1544, 231.6098, 234.5956, 237.0616, 239.5758, 242.4878, 244.5244, 248.2146, + 250.724, 252.8722, 255.5198, 258.0414, 261.941, 264.9048, 266.87, 269.4304, 272.028, + 274.4708, 278.37, 281.0624, 283.4668, 286.5532, 289.4352, 293.2564, 295.2744, 298.2118, + 300.7472, 304.1456, 307.2928, 309.7504, 312.5528, 315.979, 318.2102, 322.1834, 324.3494, + 327.325, 330.6614, 332.903, 337.2544, 339.9042, 343.215, 345.2864, 348.0814, 352.6764, + 355.301, 357.139, 360.658, 363.1732, 366.5902, 369.9538, 373.0828, 375.922, 378.9902, + 382.7328, 386.4538, 388.1136, 391.2234, 394.0878, 396.708, 401.1556, 404.1852, 406.6372, + 409.6822, 412.7796, 416.6078, 418.4916, 422.131, 424.5376, 428.1988, 432.211, 434.4502, + 438.5282, 440.912, 444.0448, 447.7432, 450.8524, 453.7988, 456.7858, 458.8868, 463.9886, + 466.5064, 468.9124, 472.6616, 475.4682, 478.582, 481.304, 485.2738, 488.6894, 490.329, + 496.106, 497.6908, 501.1374, 504.5322, 506.8848, 510.3324, 513.4512, 516.179, 520.4412, + 522.6066, 526.167, 528.7794, 533.379, 536.067, 538.46, 542.9116, 545.692, 547.9546, + 552.493, 555.2722, 557.335, 562.449, 564.2014, 569.0738, 571.0974, 574.8564, 578.2996, + 581.409, 583.9704, 585.8098, 589.6528, 594.5998, 595.958, 600.068, 603.3278, 608.2016, + 609.9632, 612.864, 615.43, 620.7794, 621.272, 625.8644, 629.206, 633.219, 634.5154, + 638.6102, }, + // precision 8 + { 184.2152, 187.2454, 190.2096, 193.6652, 196.6312, 199.6822, 203.249, 206.3296, 210.0038, + 213.2074, 216.4612, 220.27, 223.5178, 227.4412, 230.8032, 234.1634, 238.1688, 241.6074, + 245.6946, 249.2664, 252.8228, 257.0432, 260.6824, 264.9464, 268.6268, 272.2626, 276.8376, + 280.4034, 284.8956, 288.8522, 292.7638, 297.3552, 301.3556, 305.7526, 309.9292, 313.8954, + 318.8198, 322.7668, 327.298, 331.6688, 335.9466, 340.9746, 345.1672, 349.3474, 354.3028, + 358.8912, 364.114, 368.4646, 372.9744, 378.4092, 382.6022, 387.843, 392.5684, 397.1652, + 402.5426, 407.4152, 412.5388, 417.3592, 422.1366, 427.486, 432.3918, 437.5076, 442.509, + 447.3834, 453.3498, 458.0668, 463.7346, 469.1228, 473.4528, 479.7, 484.644, 491.0518, + 495.5774, 500.9068, 506.432, 512.1666, 517.434, 522.6644, 527.4894, 533.6312, 538.3804, + 544.292, 550.5496, 556.0234, 562.8206, 566.6146, 572.4188, 579.117, 583.6762, 590.6576, + 595.7864, 601.509, 607.5334, 612.9204, 619.772, 624.2924, 630.8654, 636.1836, 642.745, + 649.1316, 655.0386, 660.0136, 666.6342, 671.6196, 678.1866, 684.4282, 689.3324, 695.4794, + 702.5038, 708.129, 713.528, 720.3204, 726.463, 732.7928, 739.123, 744.7418, 751.2192, + 756.5102, 762.6066, 769.0184, 775.2224, 781.4014, 787.7618, 794.1436, 798.6506, 805.6378, + 811.766, 819.7514, 824.5776, 828.7322, 837.8048, 843.6302, 849.9336, 854.4798, 861.3388, + 867.9894, 873.8196, 880.3136, 886.2308, 892.4588, 899.0816, 905.4076, 912.0064, 917.3878, + 923.619, 929.998, 937.3482, 943.9506, 947.991, 955.1144, 962.203, 968.8222, 975.7324, + 981.7826, 988.7666, 994.2648, 1000.3128, 1007.4082, 1013.7536, 1020.3376, 1026.7156, + 1031.7478, 1037.4292, 1045.393, 1051.2278, 1058.3434, 1062.8726, 1071.884, 1076.806, + 1082.9176, 1089.1678, 1095.5032, 1102.525, 1107.2264, 1115.315, 1120.93, 1127.252, + 1134.1496, 1139.0408, 1147.5448, 1153.3296, 1158.1974, 1166.5262, 1174.3328, 1175.657, + 1184.4222, 1190.9172, 1197.1292, 1204.4606, 1210.4578, 1218.8728, 1225.3336, 1226.6592, + 1236.5768, 1241.363, 1249.4074, 1254.6566, 1260.8014, 1266.5454, 1274.5192, }, + // precision 9 + { 369, 374.8294, 381.2452, 387.6698, 394.1464, 400.2024, 406.8782, 413.6598, 420.462, + 427.2826, 433.7102, 440.7416, 447.9366, 455.1046, 462.285, 469.0668, 476.306, 483.8448, + 491.301, 498.9886, 506.2422, 513.8138, 521.7074, 529.7428, 537.8402, 545.1664, 553.3534, + 561.594, 569.6886, 577.7876, 585.65, 594.228, 602.8036, 611.1666, 620.0818, 628.0824, + 637.2574, 646.302, 655.1644, 664.0056, 672.3802, 681.7192, 690.5234, 700.2084, 708.831, + 718.485, 728.1112, 737.4764, 746.76, 756.3368, 766.5538, 775.5058, 785.2646, 795.5902, + 804.3818, 814.8998, 824.9532, 835.2062, 845.2798, 854.4728, 864.9582, 875.3292, 886.171, + 896.781, 906.5716, 916.7048, 927.5322, 937.875, 949.3972, 958.3464, 969.7274, 980.2834, + 992.1444, 1003.4264, 1013.0166, 1024.018, 1035.0438, 1046.34, 1057.6856, 1068.9836, + 1079.0312, 1091.677, 1102.3188, 1113.4846, 1124.4424, 1135.739, 1147.1488, 1158.9202, + 1169.406, 1181.5342, 1193.2834, 1203.8954, 1216.3286, 1226.2146, 1239.6684, 1251.9946, + 1262.123, 1275.4338, 1285.7378, 1296.076, 1308.9692, 1320.4964, 1333.0998, 1343.9864, + 1357.7754, 1368.3208, 1380.4838, 1392.7388, 1406.0758, 1416.9098, 1428.9728, 1440.9228, + 1453.9292, 1462.617, 1476.05, 1490.2996, 1500.6128, 1513.7392, 1524.5174, 1536.6322, + 1548.2584, 1562.3766, 1572.423, 1587.1232, 1596.5164, 1610.5938, 1622.5972, 1633.1222, + 1647.7674, 1658.5044, 1671.57, 1683.7044, 1695.4142, 1708.7102, 1720.6094, 1732.6522, + 1747.841, 1756.4072, 1769.9786, 1782.3276, 1797.5216, 1808.3186, 1819.0694, 1834.354, + 1844.575, 1856.2808, 1871.1288, 1880.7852, 1893.9622, 1906.3418, 1920.6548, 1932.9302, + 1945.8584, 1955.473, 1968.8248, 1980.6446, 1995.9598, 2008.349, 2019.8556, 2033.0334, + 2044.0206, 2059.3956, 2069.9174, 2082.6084, 2093.7036, 2106.6108, 2118.9124, 2132.301, + 2144.7628, 2159.8422, 2171.0212, 2183.101, 2193.5112, 2208.052, 2221.3194, 2233.3282, + 2247.295, 2257.7222, 2273.342, 2286.5638, 2299.6786, 2310.8114, 2322.3312, 2335.516, + 2349.874, 2363.5968, 2373.865, 2387.1918, 2401.8328, 2414.8496, 2424.544, 2436.7592, + 2447.1682, 2464.1958, 2474.3438, 2489.0006, 2497.4526, 2513.6586, 2527.19, 2540.7028, + 2553.768, }, + // precision 10 + { 738.1256, 750.4234, 763.1064, 775.4732, 788.4636, 801.0644, 814.488, 827.9654, 841.0832, + 854.7864, 868.1992, 882.2176, 896.5228, 910.1716, 924.7752, 938.899, 953.6126, 968.6492, + 982.9474, 998.5214, 1013.1064, 1028.6364, 1044.2468, 1059.4588, 1075.3832, 1091.0584, + 1106.8606, 1123.3868, 1139.5062, 1156.1862, 1172.463, 1189.339, 1206.1936, 1223.1292, + 1240.1854, 1257.2908, 1275.3324, 1292.8518, 1310.5204, 1328.4854, 1345.9318, 1364.552, + 1381.4658, 1400.4256, 1419.849, 1438.152, 1456.8956, 1474.8792, 1494.118, 1513.62, + 1532.5132, 1551.9322, 1570.7726, 1590.6086, 1610.5332, 1630.5918, 1650.4294, 1669.7662, + 1690.4106, 1710.7338, 1730.9012, 1750.4486, 1770.1556, 1791.6338, 1812.7312, 1833.6264, + 1853.9526, 1874.8742, 1896.8326, 1918.1966, 1939.5594, 1961.07, 1983.037, 2003.1804, + 2026.071, 2047.4884, 2070.0848, 2091.2944, 2114.333, 2135.9626, 2158.2902, 2181.0814, + 2202.0334, 2224.4832, 2246.39, 2269.7202, 2292.1714, 2314.2358, 2338.9346, 2360.891, + 2384.0264, 2408.3834, 2430.1544, 2454.8684, 2476.9896, 2501.4368, 2522.8702, 2548.0408, + 2570.6738, 2593.5208, 2617.0158, 2640.2302, 2664.0962, 2687.4986, 2714.2588, 2735.3914, + 2759.6244, 2781.8378, 2808.0072, 2830.6516, 2856.2454, 2877.2136, 2903.4546, 2926.785, + 2951.2294, 2976.468, 3000.867, 3023.6508, 3049.91, 3073.5984, 3098.162, 3121.5564, + 3146.2328, 3170.9484, 3195.5902, 3221.3346, 3242.7032, 3271.6112, 3296.5546, 3317.7376, + 3345.072, 3369.9518, 3394.326, 3418.1818, 3444.6926, 3469.086, 3494.2754, 3517.8698, + 3544.248, 3565.3768, 3588.7234, 3616.979, 3643.7504, 3668.6812, 3695.72, 3719.7392, + 3742.6224, 3770.4456, 3795.6602, 3819.9058, 3844.002, 3869.517, 3895.6824, 3920.8622, + 3947.1364, 3973.985, 3995.4772, 4021.62, 4046.628, 4074.65, 4096.2256, 4121.831, + 4146.6406, 4173.276, 4195.0744, 4223.9696, 4251.3708, 4272.9966, 4300.8046, 4326.302, + 4353.1248, 4374.312, 4403.0322, 4426.819, 4450.0598, 4478.5206, 4504.8116, 4528.8928, + 4553.9584, 4578.8712, 4603.8384, 4632.3872, 4655.5128, 4675.821, 4704.6222, 4731.9862, + 4755.4174, 4781.2628, 4804.332, 4832.3048, 4862.8752, 4883.4148, 4906.9544, 4935.3516, + 4954.3532, 4984.0248, 5011.217, 5035.3258, 5057.3672, 5084.1828, }, + // precision 11 + { 1477, 1501.6014, 1526.5802, 1551.7942, 1577.3042, 1603.2062, 1629.8402, 1656.2292, + 1682.9462, 1709.9926, 1737.3026, 1765.4252, 1793.0578, 1821.6092, 1849.626, 1878.5568, + 1908.527, 1937.5154, 1967.1874, 1997.3878, 2027.37, 2058.1972, 2089.5728, 2120.1012, + 2151.9668, 2183.292, 2216.0772, 2247.8578, 2280.6562, 2313.041, 2345.714, 2380.3112, + 2414.1806, 2447.9854, 2481.656, 2516.346, 2551.5154, 2586.8378, 2621.7448, 2656.6722, + 2693.5722, 2729.1462, 2765.4124, 2802.8728, 2838.898, 2876.408, 2913.4926, 2951.4938, + 2989.6776, 3026.282, 3065.7704, 3104.1012, 3143.7388, 3181.6876, 3221.1872, 3261.5048, + 3300.0214, 3339.806, 3381.409, 3421.4144, 3461.4294, 3502.2286, 3544.651, 3586.6156, + 3627.337, 3670.083, 3711.1538, 3753.5094, 3797.01, 3838.6686, 3882.1678, 3922.8116, + 3967.9978, 4009.9204, 4054.3286, 4097.5706, 4140.6014, 4185.544, 4229.5976, 4274.583, + 4316.9438, 4361.672, 4406.2786, 4451.8628, 4496.1834, 4543.505, 4589.1816, 4632.5188, + 4678.2294, 4724.8908, 4769.0194, 4817.052, 4861.4588, 4910.1596, 4956.4344, 5002.5238, + 5048.13, 5093.6374, 5142.8162, 5187.7894, 5237.3984, 5285.6078, 5331.0858, 5379.1036, + 5428.6258, 5474.6018, 5522.7618, 5571.5822, 5618.59, 5667.9992, 5714.88, 5763.454, + 5808.6982, 5860.3644, 5910.2914, 5953.571, 6005.9232, 6055.1914, 6104.5882, 6154.5702, + 6199.7036, 6251.1764, 6298.7596, 6350.0302, 6398.061, 6448.4694, 6495.933, 6548.0474, + 6597.7166, 6646.9416, 6695.9208, 6742.6328, 6793.5276, 6842.1934, 6894.2372, 6945.3864, + 6996.9228, 7044.2372, 7094.1374, 7142.2272, 7192.2942, 7238.8338, 7288.9006, 7344.0908, + 7394.8544, 7443.5176, 7490.4148, 7542.9314, 7595.6738, 7641.9878, 7694.3688, 7743.0448, + 7797.522, 7845.53, 7899.594, 7950.3132, 7996.455, 8050.9442, 8092.9114, 8153.1374, + 8197.4472, 8252.8278, 8301.8728, 8348.6776, 8401.4698, 8453.551, 8504.6598, 8553.8944, + 8604.1276, 8657.6514, 8710.3062, 8758.908, 8807.8706, 8862.1702, 8910.4668, 8960.77, + 9007.2766, 9063.164, 9121.0534, 9164.1354, 9218.1594, 9267.767, 9319.0594, 9372.155, + 9419.7126, 9474.3722, 9520.1338, 9572.368, 9622.7702, 9675.8448, 9726.5396, 9778.7378, + 9827.6554, 9878.1922, 9928.7782, 9978.3984, 10026.578, 10076.5626, 10137.1618, + 10177.5244, 10229.9176, }, + // precision 12 + { 2954, 3003.4782, 3053.3568, 3104.3666, 3155.324, 3206.9598, 3259.648, 3312.539, 3366.1474, + 3420.2576, 3474.8376, 3530.6076, 3586.451, 3643.38, 3700.4104, 3757.5638, 3815.9676, + 3875.193, 3934.838, 3994.8548, 4055.018, 4117.1742, 4178.4482, 4241.1294, 4304.4776, + 4367.4044, 4431.8724, 4496.3732, 4561.4304, 4627.5326, 4693.949, 4761.5532, 4828.7256, + 4897.6182, 4965.5186, 5034.4528, 5104.865, 5174.7164, 5244.6828, 5316.6708, 5387.8312, + 5459.9036, 5532.476, 5604.8652, 5679.6718, 5753.757, 5830.2072, 5905.2828, 5980.0434, + 6056.6264, 6134.3192, 6211.5746, 6290.0816, 6367.1176, 6447.9796, 6526.5576, 6606.1858, + 6686.9144, 6766.1142, 6847.0818, 6927.9664, 7010.9096, 7091.0816, 7175.3962, 7260.3454, + 7344.018, 7426.4214, 7511.3106, 7596.0686, 7679.8094, 7765.818, 7852.4248, 7936.834, + 8022.363, 8109.5066, 8200.4554, 8288.5832, 8373.366, 8463.4808, 8549.7682, 8642.0522, + 8728.3288, 8820.9528, 8907.727, 9001.0794, 9091.2522, 9179.988, 9269.852, 9362.6394, + 9453.642, 9546.9024, 9640.6616, 9732.6622, 9824.3254, 9917.7484, 10007.9392, 10106.7508, + 10196.2152, 10289.8114, 10383.5494, 10482.3064, 10576.8734, 10668.7872, 10764.7156, + 10862.0196, 10952.793, 11049.9748, 11146.0702, 11241.4492, 11339.2772, 11434.2336, + 11530.741, 11627.6136, 11726.311, 11821.5964, 11918.837, 12015.3724, 12113.0162, + 12213.0424, 12306.9804, 12408.4518, 12504.8968, 12604.586, 12700.9332, 12798.705, + 12898.5142, 12997.0488, 13094.788, 13198.475, 13292.7764, 13392.9698, 13486.8574, + 13590.1616, 13686.5838, 13783.6264, 13887.2638, 13992.0978, 14081.0844, 14189.9956, + 14280.0912, 14382.4956, 14486.4384, 14588.1082, 14686.2392, 14782.276, 14888.0284, + 14985.1864, 15088.8596, 15187.0998, 15285.027, 15383.6694, 15495.8266, 15591.3736, + 15694.2008, 15790.3246, 15898.4116, 15997.4522, 16095.5014, 16198.8514, 16291.7492, + 16402.6424, 16499.1266, 16606.2436, 16697.7186, 16796.3946, 16902.3376, 17005.7672, + 17100.814, 17206.8282, 17305.8262, 17416.0744, 17508.4092, 17617.0178, 17715.4554, + 17816.758, 17920.1748, 18012.9236, 18119.7984, 18223.2248, 18324.2482, 18426.6276, + 18525.0932, 18629.8976, 18733.2588, 18831.0466, 18940.1366, 19032.2696, 19131.729, + 19243.4864, 19349.6932, 19442.866, 19547.9448, 19653.2798, 19754.4034, 19854.0692, + 19965.1224, 20065.1774, 20158.2212, 20253.353, 20366.3264, 20463.22, }, + // precision 13 + { 5908.5052, 6007.2672, 6107.347, 6208.5794, 6311.2622, 6414.5514, 6519.3376, 6625.6952, + 6732.5988, 6841.3552, 6950.5972, 7061.3082, 7173.5646, 7287.109, 7401.8216, 7516.4344, + 7633.3802, 7751.2962, 7870.3784, 7990.292, 8110.79, 8233.4574, 8356.6036, 8482.2712, + 8607.7708, 8735.099, 8863.1858, 8993.4746, 9123.8496, 9255.6794, 9388.5448, 9522.7516, + 9657.3106, 9792.6094, 9930.5642, 10068.794, 10206.7256, 10347.81, 10490.3196, 10632.0778, + 10775.9916, 10920.4662, 11066.124, 11213.073, 11358.0362, 11508.1006, 11659.1716, + 11808.7514, 11959.4884, 12112.1314, 12265.037, 12420.3756, 12578.933, 12734.311, + 12890.0006, 13047.2144, 13207.3096, 13368.5144, 13528.024, 13689.847, 13852.7528, + 14018.3168, 14180.5372, 14346.9668, 14513.5074, 14677.867, 14846.2186, 15017.4186, + 15184.9716, 15356.339, 15529.2972, 15697.3578, 15871.8686, 16042.187, 16216.4094, + 16389.4188, 16565.9126, 16742.3272, 16919.0042, 17094.7592, 17273.965, 17451.8342, + 17634.4254, 17810.5984, 17988.9242, 18171.051, 18354.7938, 18539.466, 18721.0408, + 18904.9972, 19081.867, 19271.9118, 19451.8694, 19637.9816, 19821.2922, 20013.1292, + 20199.3858, 20387.8726, 20572.9514, 20770.7764, 20955.1714, 21144.751, 21329.9952, + 21520.709, 21712.7016, 21906.3868, 22096.2626, 22286.0524, 22475.051, 22665.5098, + 22862.8492, 23055.5294, 23249.6138, 23437.848, 23636.273, 23826.093, 24020.3296, + 24213.3896, 24411.7392, 24602.9614, 24805.7952, 24998.1552, 25193.9588, 25389.0166, + 25585.8392, 25780.6976, 25981.2728, 26175.977, 26376.5252, 26570.1964, 26773.387, + 26962.9812, 27163.0586, 27368.164, 27565.0534, 27758.7428, 27961.1276, 28163.2324, + 28362.3816, 28565.7668, 28758.644, 28956.9768, 29163.4722, 29354.7026, 29561.1186, + 29767.9948, 29959.9986, 30164.0492, 30366.9818, 30562.5338, 30762.9928, 30976.1592, + 31166.274, 31376.722, 31570.3734, 31770.809, 31974.8934, 32179.5286, 32387.5442, + 32582.3504, 32794.076, 32989.9528, 33191.842, 33392.4684, 33595.659, 33801.8672, + 34000.3414, 34200.0922, 34402.6792, 34610.0638, 34804.0084, 35011.13, 35218.669, + 35418.6634, 35619.0792, 35830.6534, 36028.4966, 36229.7902, 36438.6422, 36630.7764, + 36833.3102, 37048.6728, 37247.3916, 37453.5904, 37669.3614, 37854.5526, 38059.305, + 38268.0936, 38470.2516, 38674.7064, 38876.167, 39068.3794, 39281.9144, 39492.8566, + 39684.8628, 39898.4108, 40093.1836, 40297.6858, 40489.7086, 40717.2424, }, + // precision 14 + { 11817.475, 12015.0046, 12215.3792, 12417.7504, 12623.1814, 12830.0086, 13040.0072, + 13252.503, 13466.178, 13683.2738, 13902.0344, 14123.9798, 14347.394, 14573.7784, + 14802.6894, 15033.6824, 15266.9134, 15502.8624, 15741.4944, 15980.7956, 16223.8916, + 16468.6316, 16715.733, 16965.5726, 17217.204, 17470.666, 17727.8516, 17986.7886, + 18247.6902, 18510.9632, 18775.304, 19044.7486, 19314.4408, 19587.202, 19862.2576, + 20135.924, 20417.0324, 20697.9788, 20979.6112, 21265.0274, 21550.723, 21841.6906, + 22132.162, 22428.1406, 22722.127, 23020.5606, 23319.7394, 23620.4014, 23925.2728, + 24226.9224, 24535.581, 24845.505, 25155.9618, 25470.3828, 25785.9702, 26103.7764, + 26420.4132, 26742.0186, 27062.8852, 27388.415, 27714.6024, 28042.296, 28365.4494, + 28701.1526, 29031.8008, 29364.2156, 29704.497, 30037.1458, 30380.111, 30723.8168, + 31059.5114, 31404.9498, 31751.6752, 32095.2686, 32444.7792, 32794.767, 33145.204, + 33498.4226, 33847.6502, 34209.006, 34560.849, 34919.4838, 35274.9778, 35635.1322, + 35996.3266, 36359.1394, 36722.8266, 37082.8516, 37447.7354, 37815.9606, 38191.0692, + 38559.4106, 38924.8112, 39294.6726, 39663.973, 40042.261, 40416.2036, 40779.2036, + 41161.6436, 41540.9014, 41921.1998, 42294.7698, 42678.5264, 43061.3464, 43432.375, + 43818.432, 44198.6598, 44583.0138, 44970.4794, 45353.924, 45729.858, 46118.2224, + 46511.5724, 46900.7386, 47280.6964, 47668.1472, 48055.6796, 48446.9436, 48838.7146, + 49217.7296, 49613.7796, 50010.7508, 50410.0208, 50793.7886, 51190.2456, 51583.1882, + 51971.0796, 52376.5338, 52763.319, 53165.5534, 53556.5594, 53948.2702, 54346.352, + 54748.7914, 55138.577, 55543.4824, 55941.1748, 56333.7746, 56745.1552, 57142.7944, + 57545.2236, 57935.9956, 58348.5268, 58737.5474, 59158.5962, 59542.6896, 59958.8004, + 60349.3788, 60755.0212, 61147.6144, 61548.194, 61946.0696, 62348.6042, 62763.603, + 63162.781, 63560.635, 63974.3482, 64366.4908, 64771.5876, 65176.7346, 65597.3916, + 65995.915, 66394.0384, 66822.9396, 67203.6336, 67612.2032, 68019.0078, 68420.0388, + 68821.22, 69235.8388, 69640.0724, 70055.155, 70466.357, 70863.4266, 71276.2482, + 71677.0306, 72080.2006, 72493.0214, 72893.5952, 73314.5856, 73714.9852, 74125.3022, + 74521.2122, 74933.6814, 75341.5904, 75743.0244, 76166.0278, 76572.1322, 76973.1028, + 77381.6284, 77800.6092, 78189.328, 78607.0962, 79012.2508, 79407.8358, 79825.725, + 80238.701, 80646.891, 81035.6436, 81460.0448, 81876.3884, }, + // precision 15 + { 23635.0036, 24030.8034, 24431.4744, 24837.1524, 25246.7928, 25661.326, 26081.3532, + 26505.2806, 26933.9892, 27367.7098, 27805.318, 28248.799, 28696.4382, 29148.8244, + 29605.5138, 30066.8668, 30534.2344, 31006.32, 31480.778, 31962.2418, 32447.3324, + 32938.0232, 33432.731, 33930.728, 34433.9896, 34944.1402, 35457.5588, 35974.5958, + 36497.3296, 37021.9096, 37554.326, 38088.0826, 38628.8816, 39171.3192, 39723.2326, + 40274.5554, 40832.3142, 41390.613, 41959.5908, 42532.5466, 43102.0344, 43683.5072, + 44266.694, 44851.2822, 45440.7862, 46038.0586, 46640.3164, 47241.064, 47846.155, + 48454.7396, 49076.9168, 49692.542, 50317.4778, 50939.65, 51572.5596, 52210.2906, + 52843.7396, 53481.3996, 54127.236, 54770.406, 55422.6598, 56078.7958, 56736.7174, + 57397.6784, 58064.5784, 58730.308, 59404.9784, 60077.0864, 60751.9158, 61444.1386, + 62115.817, 62808.7742, 63501.4774, 64187.5454, 64883.6622, 65582.7468, 66274.5318, + 66976.9276, 67688.7764, 68402.138, 69109.6274, 69822.9706, 70543.6108, 71265.5202, + 71983.3848, 72708.4656, 73433.384, 74158.4664, 74896.4868, 75620.9564, 76362.1434, + 77098.3204, 77835.7662, 78582.6114, 79323.9902, 80067.8658, 80814.9246, 81567.0136, + 82310.8536, 83061.9952, 83821.4096, 84580.8608, 85335.547, 86092.5802, 86851.6506, + 87612.311, 88381.2016, 89146.3296, 89907.8974, 90676.846, 91451.4152, 92224.5518, + 92995.8686, 93763.5066, 94551.2796, 95315.1944, 96096.1806, 96881.0918, 97665.679, + 98442.68, 99229.3002, 100011.0994, 100790.6386, 101580.1564, 102377.7484, 103152.1392, + 103944.2712, 104730.216, 105528.6336, 106324.9398, 107117.6706, 107890.3988, 108695.2266, + 109485.238, 110294.7876, 111075.0958, 111878.0496, 112695.2864, 113464.5486, 114270.0474, + 115068.608, 115884.3626, 116673.2588, 117483.3716, 118275.097, 119085.4092, 119879.2808, + 120687.5868, 121499.9944, 122284.916, 123095.9254, 123912.5038, 124709.0454, 125503.7182, + 126323.259, 127138.9412, 127943.8294, 128755.646, 129556.5354, 130375.3298, 131161.4734, + 131971.1962, 132787.5458, 133588.1056, 134431.351, 135220.2906, 136023.398, 136846.6558, + 137667.0004, 138463.663, 139283.7154, 140074.6146, 140901.3072, 141721.8548, 142543.2322, + 143356.1096, 144173.7412, 144973.0948, 145794.3162, 146609.5714, 147420.003, 148237.9784, + 149050.5696, 149854.761, 150663.1966, 151494.0754, 152313.1416, 153112.6902, 153935.7206, + 154746.9262, 155559.547, 156401.9746, 157228.7036, 158008.7254, 158820.75, 159646.9184, + 160470.4458, 161279.5348, 162093.3114, 162918.542, 163729.2842, }, + // precision 16 + { 47271, 48062.3584, 48862.7074, 49673.152, 50492.8416, 51322.9514, 52161.03, 53009.407, + 53867.6348, 54734.206, 55610.5144, 56496.2096, 57390.795, 58297.268, 59210.6448, + 60134.665, 61068.0248, 62010.4472, 62962.5204, 63923.5742, 64895.0194, 65876.4182, + 66862.6136, 67862.6968, 68868.8908, 69882.8544, 70911.271, 71944.0924, 72990.0326, + 74040.692, 75100.6336, 76174.7826, 77252.5998, 78340.2974, 79438.2572, 80545.4976, + 81657.2796, 82784.6336, 83915.515, 85059.7362, 86205.9368, 87364.4424, 88530.3358, + 89707.3744, 90885.9638, 92080.197, 93275.5738, 94479.391, 95695.918, 96919.2236, + 98148.4602, 99382.3474, 100625.6974, 101878.0284, 103141.6278, 104409.4588, 105686.2882, + 106967.5402, 108261.6032, 109548.1578, 110852.0728, 112162.231, 113479.0072, 114806.2626, + 116137.9072, 117469.5048, 118813.5186, 120165.4876, 121516.2556, 122875.766, 124250.5444, + 125621.2222, 127003.2352, 128387.848, 129775.2644, 131181.7776, 132577.3086, 133979.9458, + 135394.1132, 136800.9078, 138233.217, 139668.5308, 141085.212, 142535.2122, 143969.0684, + 145420.2872, 146878.1542, 148332.7572, 149800.3202, 151269.66, 152743.6104, 154213.0948, + 155690.288, 157169.4246, 158672.1756, 160160.059, 161650.6854, 163145.7772, 164645.6726, + 166159.1952, 167682.1578, 169177.3328, 170700.0118, 172228.8964, 173732.6664, + 175265.5556, 176787.799, 178317.111, 179856.6914, 181400.865, 182943.4612, 184486.742, + 186033.4698, 187583.7886, 189148.1868, 190688.4526, 192250.1926, 193810.9042, + 195354.2972, 196938.7682, 198493.5898, 200079.2824, 201618.912, 203205.5492, 204765.5798, + 206356.1124, 207929.3064, 209498.7196, 211086.229, 212675.1324, 214256.7892, 215826.2392, + 217412.8474, 218995.6724, 220618.6038, 222207.1166, 223781.0364, 225387.4332, + 227005.7928, 228590.4336, 230217.8738, 231805.1054, 233408.9, 234995.3432, 236601.4956, + 238190.7904, 239817.2548, 241411.2832, 243002.4066, 244640.1884, 246255.3128, + 247849.3508, 249479.9734, 251106.8822, 252705.027, 254332.9242, 255935.129, 257526.9014, + 259154.772, 260777.625, 262390.253, 264004.4906, 265643.59, 267255.4076, 268873.426, + 270470.7252, 272106.4804, 273722.4456, 275337.794, 276945.7038, 278592.9154, 280204.3726, + 281841.1606, 283489.171, 285130.1716, 286735.3362, 288364.7164, 289961.1814, 291595.5524, + 293285.683, 294899.6668, 296499.3434, 298128.0462, 299761.8946, 301394.2424, 302997.6748, + 304615.1478, 306269.7724, 307886.114, 309543.1028, 311153.2862, 312782.8546, 314421.2008, + 316033.2438, 317692.9636, 319305.2648, 320948.7406, 322566.3364, 324228.4224, + 325847.1542, }, + // precision 17 + { 94542, 96125.811, 97728.019, 99348.558, 100987.9705, 102646.7565, 104324.5125, 106021.7435, + 107736.7865, 109469.272, 111223.9465, 112995.219, 114787.432, 116593.152, 118422.71, + 120267.2345, 122134.6765, 124020.937, 125927.2705, 127851.255, 129788.9485, 131751.016, + 133726.8225, 135722.592, 137736.789, 139770.568, 141821.518, 143891.343, 145982.1415, + 148095.387, 150207.526, 152355.649, 154515.6415, 156696.05, 158887.7575, 161098.159, + 163329.852, 165569.053, 167837.4005, 170121.6165, 172420.4595, 174732.6265, 177062.77, + 179412.502, 181774.035, 184151.939, 186551.6895, 188965.691, 191402.8095, 193857.949, + 196305.0775, 198774.6715, 201271.2585, 203764.78, 206299.3695, 208818.1365, 211373.115, + 213946.7465, 216532.076, 219105.541, 221714.5375, 224337.5135, 226977.5125, 229613.0655, + 232270.2685, 234952.2065, 237645.3555, 240331.1925, 243034.517, 245756.0725, 248517.6865, + 251232.737, 254011.3955, 256785.995, 259556.44, 262368.335, 265156.911, 267965.266, + 270785.583, 273616.0495, 276487.4835, 279346.639, 282202.509, 285074.3885, 287942.2855, + 290856.018, 293774.0345, 296678.5145, 299603.6355, 302552.6575, 305492.9785, 308466.8605, + 311392.581, 314347.538, 317319.4295, 320285.9785, 323301.7325, 326298.3235, 329301.3105, + 332301.987, 335309.791, 338370.762, 341382.923, 344431.1265, 347464.1545, 350507.28, + 353619.2345, 356631.2005, 359685.203, 362776.7845, 365886.488, 368958.2255, 372060.6825, + 375165.4335, 378237.935, 381328.311, 384430.5225, 387576.425, 390683.242, 393839.648, + 396977.8425, 400101.9805, 403271.296, 406409.8425, 409529.5485, 412678.7, 415847.423, + 419020.8035, 422157.081, 425337.749, 428479.6165, 431700.902, 434893.1915, 438049.582, + 441210.5415, 444379.2545, 447577.356, 450741.931, 453959.548, 457137.0935, 460329.846, + 463537.4815, 466732.3345, 469960.5615, 473164.681, 476347.6345, 479496.173, 482813.1645, + 486025.6995, 489249.4885, 492460.1945, 495675.8805, 498908.0075, 502131.802, 505374.3855, + 508550.9915, 511806.7305, 515026.776, 518217.0005, 521523.9855, 524705.9855, 527950.997, + 531210.0265, 534472.497, 537750.7315, 540926.922, 544207.094, 547429.4345, 550666.3745, + 553975.3475, 557150.7185, 560399.6165, 563662.697, 566916.7395, 570146.1215, 573447.425, + 576689.6245, 579874.5745, 583202.337, 586503.0255, 589715.635, 592910.161, 596214.3885, + 599488.035, 602740.92, 605983.0685, 609248.67, 612491.3605, 615787.912, 619107.5245, + 622307.9555, 625577.333, 628840.4385, 632085.2155, 635317.6135, 638691.7195, 641887.467, + 645139.9405, 648441.546, 651666.252, 654941.845, }, + // precision 18 + { 189084, 192250.913, 195456.774, 198696.946, 201977.762, 205294.444, 208651.754, 212042.099, + 215472.269, 218941.91, 222443.912, 225996.845, 229568.199, 233193.568, 236844.457, + 240543.233, 244279.475, 248044.27, 251854.588, 255693.2, 259583.619, 263494.621, + 267445.385, 271454.061, 275468.769, 279549.456, 283646.446, 287788.198, 291966.099, + 296181.164, 300431.469, 304718.618, 309024.004, 313393.508, 317760.803, 322209.731, + 326675.061, 331160.627, 335654.47, 340241.442, 344841.833, 349467.132, 354130.629, + 358819.432, 363574.626, 368296.587, 373118.482, 377914.93, 382782.301, 387680.669, + 392601.981, 397544.323, 402529.115, 407546.018, 412593.658, 417638.657, 422762.865, + 427886.169, 433017.167, 438213.273, 443441.254, 448692.421, 453937.533, 459239.049, + 464529.569, 469910.083, 475274.03, 480684.473, 486070.26, 491515.237, 496995.651, + 502476.617, 507973.609, 513497.19, 519083.233, 524726.509, 530305.505, 535945.728, + 541584.404, 547274.055, 552967.236, 558667.862, 564360.216, 570128.148, 575965.08, + 581701.952, 587532.523, 593361.144, 599246.128, 605033.418, 610958.779, 616837.117, + 622772.818, 628672.04, 634675.369, 640574.831, 646585.739, 652574.547, 658611.217, + 664642.684, 670713.914, 676737.681, 682797.313, 688837.897, 694917.874, 701009.882, + 707173.648, 713257.254, 719415.392, 725636.761, 731710.697, 737906.209, 744103.074, + 750313.39, 756504.185, 762712.579, 768876.985, 775167.859, 781359, 787615.959, + 793863.597, 800245.477, 806464.582, 812785.294, 819005.925, 825403.057, 831676.197, + 837936.284, 844266.968, 850642.711, 856959.756, 863322.774, 869699.931, 876102.478, + 882355.787, 888694.463, 895159.952, 901536.143, 907872.631, 914293.672, 920615.14, + 927130.974, 933409.404, 939922.178, 946331.47, 952745.93, 959209.264, 965590.224, + 972077.284, 978501.961, 984953.19, 991413.271, 997817.479, 1004222.658, 1010725.676, + 1017177.138, 1023612.529, 1030098.236, 1036493.719, 1043112.207, 1049537.036, + 1056008.096, 1062476.184, 1068942.337, 1075524.95, 1081932.864, 1088426.025, 1094776.005, + 1101327.448, 1107901.673, 1114423.639, 1120884.602, 1127324.923, 1133794.24, 1140328.886, + 1146849.376, 1153346.682, 1159836.502, 1166478.703, 1172953.304, 1179391.502, + 1185950.982, 1192544.052, 1198913.41, 1205430.994, 1212015.525, 1218674.042, 1225121.683, + 1231551.101, 1238126.379, 1244673.795, 1251260.649, 1257697.86, 1264320.983, 1270736.319, + 1277274.694, 1283804.95, 1290211.514, 1296858.568, 1303455.691, } }; + + public static final double[][] biasData = { + // precision 4 + { 10, 9.717, 9.207, 8.7896, 8.2882, 7.8204, 7.3772, 6.9342, 6.5202, 6.161, 5.7722, 5.4636, + 5.0396, 4.6766, 4.3566, 4.0454, 3.7936, 3.4856, 3.2666, 2.9946, 2.766, 2.4692, 2.3638, + 2.0764, 1.7864, 1.7602, 1.4814, 1.433, 1.2926, 1.0664, 0.999600000000001, 0.7956, 0.5366, + 0.589399999999998, 0.573799999999999, 0.269799999999996, 0.368200000000002, + 0.0544000000000011, 0.234200000000001, 0.0108000000000033, -0.203400000000002, + -0.0701999999999998, -0.129600000000003, -0.364199999999997, -0.480600000000003, + -0.226999999999997, -0.322800000000001, -0.382599999999996, -0.511200000000002, + -0.669600000000003, -0.749400000000001, -0.500399999999999, -0.617600000000003, -0.6922, + -0.601599999999998, -0.416200000000003, -0.338200000000001, -0.782600000000002, + -0.648600000000002, -0.919800000000002, -0.851799999999997, -0.962400000000002, -0.6402, + -1.1922, -1.0256, -1.086, -1.21899999999999, -0.819400000000002, -0.940600000000003, + -1.1554, -1.2072, -1.1752, -1.16759999999999, -1.14019999999999, -1.3754, + -1.29859999999999, -1.607, -1.3292, -1.7606, }, + // precision 5 + { 22, 21.1194, 20.8208, 20.2318, 19.77, 19.2436, 18.7774, 18.2848, 17.8224, 17.3742, 16.9336, + 16.503, 16.0494, 15.6292, 15.2124, 14.798, 14.367, 13.9728, 13.5944, 13.217, 12.8438, + 12.3696, 12.0956, 11.7044, 11.324, 11.0668, 10.6698, 10.3644, 10.049, 9.6918, 9.4146, + 9.082, 8.687, 8.5398, 8.2462, 7.857, 7.6606, 7.4168, 7.1248, 6.9222, 6.6804, 6.447, + 6.3454, 5.9594, 5.7636, 5.5776, 5.331, 5.19, 4.9676, 4.7564, 4.5314, 4.4442, 4.3708, + 3.9774, 3.9624, 3.8796, 3.755, 3.472, 3.2076, 3.1024, 2.8908, 2.7338, 2.7728, 2.629, + 2.413, 2.3266, 2.1524, 2.2642, 2.1806, 2.0566, 1.9192, 1.7598, 1.3516, 1.5802, + 1.43859999999999, 1.49160000000001, 1.1524, 1.1892, 0.841399999999993, 0.879800000000003, + 0.837599999999995, 0.469800000000006, 0.765600000000006, 0.331000000000003, + 0.591399999999993, 0.601200000000006, 0.701599999999999, 0.558199999999999, + 0.339399999999998, 0.354399999999998, 0.491200000000006, 0.308000000000007, + 0.355199999999996, -0.0254000000000048, 0.205200000000005, -0.272999999999996, + 0.132199999999997, 0.394400000000005, -0.241200000000006, 0.242000000000004, + 0.191400000000002, 0.253799999999998, -0.122399999999999, -0.370800000000003, + 0.193200000000004, -0.0848000000000013, 0.0867999999999967, -0.327200000000005, + -0.285600000000002, 0.311400000000006, -0.128399999999999, -0.754999999999995, + -0.209199999999996, -0.293599999999998, -0.364000000000004, -0.253600000000006, + -0.821200000000005, -0.253600000000006, -0.510400000000004, -0.383399999999995, + -0.491799999999998, -0.220200000000006, -0.0972000000000008, -0.557400000000001, + -0.114599999999996, -0.295000000000002, -0.534800000000004, 0.346399999999988, + -0.65379999999999, 0.0398000000000138, 0.0341999999999985, -0.995800000000003, + -0.523400000000009, -0.489000000000004, -0.274799999999999, -0.574999999999989, + -0.482799999999997, 0.0571999999999946, -0.330600000000004, -0.628800000000012, + -0.140199999999993, -0.540600000000012, -0.445999999999998, -0.599400000000003, + -0.262599999999992, 0.163399999999996, -0.100599999999986, -0.39500000000001, + -1.06960000000001, -0.836399999999998, -0.753199999999993, -0.412399999999991, + -0.790400000000005, -0.29679999999999, -0.28540000000001, -0.193000000000012, + -0.0772000000000048, -0.962799999999987, -0.414800000000014, }, + // precision 6 + { 45, 44.1902, 43.271, 42.8358, 41.8142, 41.2854, 40.317, 39.354, 38.8924, 37.9436, 37.4596, + 36.5262, 35.6248, 35.1574, 34.2822, 33.837, 32.9636, 32.074, 31.7042, 30.7976, 30.4772, + 29.6564, 28.7942, 28.5004, 27.686, 27.291, 26.5672, 25.8556, 25.4982, 24.8204, 24.4252, + 23.7744, 23.0786, 22.8344, 22.0294, 21.8098, 21.0794, 20.5732, 20.1878, 19.5648, 19.2902, + 18.6784, 18.3352, 17.8946, 17.3712, 17.0852, 16.499, 16.2686, 15.6844, 15.2234, 14.9732, + 14.3356, 14.2286, 13.7262, 13.3284, 13.1048, 12.5962, 12.3562, 12.1272, 11.4184, 11.4974, + 11.0822, 10.856, 10.48, 10.2834, 10.0208, 9.637, 9.51739999999999, 9.05759999999999, + 8.74760000000001, 8.42700000000001, 8.1326, 8.2372, 8.2788, 7.6776, 7.79259999999999, + 7.1952, 6.9564, 6.6454, 6.87, 6.5428, 6.19999999999999, 6.02940000000001, + 5.62780000000001, 5.6782, 5.792, 5.35159999999999, 5.28319999999999, 5.0394, + 5.07480000000001, 4.49119999999999, 4.84899999999999, 4.696, 4.54040000000001, + 4.07300000000001, 4.37139999999999, 3.7216, 3.7328, 3.42080000000001, 3.41839999999999, + 3.94239999999999, 3.27719999999999, 3.411, 3.13079999999999, 2.76900000000001, + 2.92580000000001, 2.68279999999999, 2.75020000000001, 2.70599999999999, 2.3886, + 3.01859999999999, 2.45179999999999, 2.92699999999999, 2.41720000000001, 2.41139999999999, + 2.03299999999999, 2.51240000000001, 2.5564, 2.60079999999999, 2.41720000000001, + 1.80439999999999, 1.99700000000001, 2.45480000000001, 1.8948, 2.2346, 2.30860000000001, + 2.15479999999999, 1.88419999999999, 1.6508, 0.677199999999999, 1.72540000000001, 1.4752, + 1.72280000000001, 1.66139999999999, 1.16759999999999, 1.79300000000001, 1.00059999999999, + 0.905200000000008, 0.659999999999997, 1.55879999999999, 1.1636, 0.688199999999995, + 0.712600000000009, 0.450199999999995, 1.1978, 0.975599999999986, 0.165400000000005, + 1.727, 1.19739999999999, -0.252600000000001, 1.13460000000001, 1.3048, 1.19479999999999, + 0.313400000000001, 0.878999999999991, 1.12039999999999, 0.853000000000009, + 1.67920000000001, 0.856999999999999, 0.448599999999999, 1.2362, 0.953399999999988, + 1.02859999999998, 0.563199999999995, 0.663000000000011, 0.723000000000013, + 0.756599999999992, 0.256599999999992, -0.837600000000009, 0.620000000000005, + 0.821599999999989, 0.216600000000028, 0.205600000000004, 0.220199999999977, + 0.372599999999977, 0.334400000000016, 0.928400000000011, 0.972800000000007, + 0.192400000000021, 0.487199999999973, -0.413000000000011, 0.807000000000016, + 0.120600000000024, 0.769000000000005, 0.870799999999974, 0.66500000000002, + 0.118200000000002, 0.401200000000017, 0.635199999999998, 0.135400000000004, + 0.175599999999974, 1.16059999999999, 0.34620000000001, 0.521400000000028, + -0.586599999999976, -1.16480000000001, 0.968399999999974, 0.836999999999989, + 0.779600000000016, 0.985799999999983, }, + // precision 7 + { 91, 89.4934, 87.9758, 86.4574, 84.9718, 83.4954, 81.5302, 80.0756, 78.6374, 77.1782, + 75.7888, 73.9522, 72.592, 71.2532, 69.9086, 68.5938, 66.9474, 65.6796, 64.4394, 63.2176, + 61.9768, 60.4214, 59.2528, 58.0102, 56.8658, 55.7278, 54.3044, 53.1316, 52.093, 51.0032, + 49.9092, 48.6306, 47.5294, 46.5756, 45.6508, 44.662, 43.552, 42.3724, 41.617, 40.5754, + 39.7872, 38.8444, 37.7988, 36.8606, 36.2118, 35.3566, 34.4476, 33.5882, 32.6816, 32.0824, + 31.0258, 30.6048, 29.4436, 28.7274, 27.957, 27.147, 26.4364, 25.7592, 25.3386, 24.781, + 23.8028, 23.656, 22.6544, 21.996, 21.4718, 21.1544, 20.6098, 19.5956, 19.0616, 18.5758, + 18.4878, 17.5244, 17.2146, 16.724, 15.8722, 15.5198, 15.0414, 14.941, 14.9048, 13.87, + 13.4304, 13.028, 12.4708, 12.37, 12.0624, 11.4668, 11.5532, 11.4352, 11.2564, 10.2744, + 10.2118, 9.74720000000002, 10.1456, 9.2928, 8.75040000000001, 8.55279999999999, + 8.97899999999998, 8.21019999999999, 8.18340000000001, 7.3494, 7.32499999999999, + 7.66140000000001, 6.90300000000002, 7.25439999999998, 6.9042, 7.21499999999997, + 6.28640000000001, 6.08139999999997, 6.6764, 6.30099999999999, 5.13900000000001, + 5.65800000000002, 5.17320000000001, 4.59019999999998, 4.9538, 5.08280000000002, + 4.92200000000003, 4.99020000000002, 4.7328, 5.4538, 4.11360000000002, 4.22340000000003, + 4.08780000000002, 3.70800000000003, 4.15559999999999, 4.18520000000001, 3.63720000000001, + 3.68220000000002, 3.77960000000002, 3.6078, 2.49160000000001, 3.13099999999997, 2.5376, + 3.19880000000001, 3.21100000000001, 2.4502, 3.52820000000003, 2.91199999999998, + 3.04480000000001, 2.7432, 2.85239999999999, 2.79880000000003, 2.78579999999999, + 1.88679999999999, 2.98860000000002, 2.50639999999999, 1.91239999999999, 2.66160000000002, + 2.46820000000002, 1.58199999999999, 1.30399999999997, 2.27379999999999, 2.68939999999998, + 1.32900000000001, 3.10599999999999, 1.69080000000002, 2.13740000000001, 2.53219999999999, + 1.88479999999998, 1.33240000000001, 1.45119999999997, 1.17899999999997, 2.44119999999998, + 1.60659999999996, 2.16700000000003, 0.77940000000001, 2.37900000000002, 2.06700000000001, + 1.46000000000004, 2.91160000000002, 1.69200000000001, 0.954600000000028, + 2.49300000000005, 2.2722, 1.33500000000004, 2.44899999999996, 1.20140000000004, + 3.07380000000001, 2.09739999999999, 2.85640000000001, 2.29960000000005, 2.40899999999999, + 1.97040000000004, 0.809799999999996, 1.65279999999996, 2.59979999999996, + 0.95799999999997, 2.06799999999998, 2.32780000000002, 4.20159999999998, 1.96320000000003, + 1.86400000000003, 1.42999999999995, 3.77940000000001, 1.27200000000005, 1.86440000000005, + 2.20600000000002, 3.21900000000005, 1.5154, 2.61019999999996, }, + // precision 8 + { 183.2152, 180.2454, 177.2096, 173.6652, 170.6312, 167.6822, 164.249, 161.3296, 158.0038, + 155.2074, 152.4612, 149.27, 146.5178, 143.4412, 140.8032, 138.1634, 135.1688, 132.6074, + 129.6946, 127.2664, 124.8228, 122.0432, 119.6824, 116.9464, 114.6268, 112.2626, 109.8376, + 107.4034, 104.8956, 102.8522, 100.7638, 98.3552, 96.3556, 93.7526, 91.9292, 89.8954, + 87.8198, 85.7668, 83.298, 81.6688, 79.9466, 77.9746, 76.1672, 74.3474, 72.3028, 70.8912, + 69.114, 67.4646, 65.9744, 64.4092, 62.6022, 60.843, 59.5684, 58.1652, 56.5426, 55.4152, + 53.5388, 52.3592, 51.1366, 49.486, 48.3918, 46.5076, 45.509, 44.3834, 43.3498, 42.0668, + 40.7346, 40.1228, 38.4528, 37.7, 36.644, 36.0518, 34.5774, 33.9068, 32.432, 32.1666, + 30.434, 29.6644, 28.4894, 27.6312, 26.3804, 26.292, 25.5496000000001, 25.0234, 24.8206, + 22.6146, 22.4188, 22.117, 20.6762, 20.6576, 19.7864, 19.509, 18.5334, 17.9204, 17.772, + 16.2924, 16.8654, 15.1836, 15.745, 15.1316, 15.0386, 14.0136, 13.6342, 12.6196, 12.1866, + 12.4281999999999, 11.3324, 10.4794000000001, 11.5038, 10.129, 9.52800000000002, + 10.3203999999999, 9.46299999999997, 9.79280000000006, 9.12300000000005, 8.74180000000001, + 9.2192, 7.51020000000005, 7.60659999999996, 7.01840000000004, 7.22239999999999, + 7.40139999999997, 6.76179999999999, 7.14359999999999, 5.65060000000005, 5.63779999999997, + 5.76599999999996, 6.75139999999999, 5.57759999999996, 3.73220000000003, 5.8048, + 5.63019999999995, 4.93359999999996, 3.47979999999995, 4.33879999999999, 3.98940000000005, + 3.81960000000004, 3.31359999999995, 3.23080000000004, 3.4588, 3.08159999999998, 3.4076, + 3.00639999999999, 2.38779999999997, 2.61900000000003, 1.99800000000005, 3.34820000000002, + 2.95060000000001, 0.990999999999985, 2.11440000000005, 2.20299999999997, + 2.82219999999995, 2.73239999999998, 2.7826, 3.76660000000004, 2.26480000000004, + 2.31280000000004, 2.40819999999997, 2.75360000000001, 3.33759999999995, 2.71559999999999, + 1.7478000000001, 1.42920000000004, 2.39300000000003, 2.22779999999989, 2.34339999999997, + 0.87259999999992, 3.88400000000001, 1.80600000000004, 1.91759999999999, 1.16779999999994, + 1.50320000000011, 2.52500000000009, 0.226400000000012, 2.31500000000005, + 0.930000000000064, 1.25199999999995, 2.14959999999996, 0.0407999999999902, + 2.5447999999999, 1.32960000000003, 0.197400000000016, 2.52620000000002, 3.33279999999991, + -1.34300000000007, 0.422199999999975, 0.917200000000093, 1.12920000000008, + 1.46060000000011, 1.45779999999991, 2.8728000000001, 3.33359999999993, -1.34079999999994, + 1.57680000000005, 0.363000000000056, 1.40740000000005, 0.656600000000026, + 0.801400000000058, -0.454600000000028, 1.51919999999996, }, + // precision 9 + { 368, 361.8294, 355.2452, 348.6698, 342.1464, 336.2024, 329.8782, 323.6598, 317.462, + 311.2826, 305.7102, 299.7416, 293.9366, 288.1046, 282.285, 277.0668, 271.306, 265.8448, + 260.301, 254.9886, 250.2422, 244.8138, 239.7074, 234.7428, 229.8402, 225.1664, 220.3534, + 215.594, 210.6886, 205.7876, 201.65, 197.228, 192.8036, 188.1666, 184.0818, 180.0824, + 176.2574, 172.302, 168.1644, 164.0056, 160.3802, 156.7192, 152.5234, 149.2084, 145.831, + 142.485, 139.1112, 135.4764, 131.76, 129.3368, 126.5538, 122.5058, 119.2646, 116.5902, + 113.3818, 110.8998, 107.9532, 105.2062, 102.2798, 99.4728, 96.9582, 94.3292, 92.171, + 89.7809999999999, 87.5716, 84.7048, 82.5322, 79.875, 78.3972, 75.3464, 73.7274, 71.2834, + 70.1444, 68.4263999999999, 66.0166, 64.018, 62.0437999999999, 60.3399999999999, 58.6856, + 57.9836, 55.0311999999999, 54.6769999999999, 52.3188, 51.4846, 49.4423999999999, 47.739, + 46.1487999999999, 44.9202, 43.4059999999999, 42.5342000000001, 41.2834, 38.8954000000001, + 38.3286000000001, 36.2146, 36.6684, 35.9946, 33.123, 33.4338, 31.7378000000001, 29.076, + 28.9692, 27.4964, 27.0998, 25.9864, 26.7754, 24.3208, 23.4838, 22.7388000000001, + 24.0758000000001, 21.9097999999999, 20.9728, 19.9228000000001, 19.9292, 16.617, 17.05, + 18.2996000000001, 15.6128000000001, 15.7392, 14.5174, 13.6322, 12.2583999999999, + 13.3766000000001, 11.423, 13.1232, 9.51639999999998, 10.5938000000001, 9.59719999999993, + 8.12220000000002, 9.76739999999995, 7.50440000000003, 7.56999999999994, 6.70440000000008, + 6.41419999999994, 6.71019999999999, 5.60940000000005, 4.65219999999999, 6.84099999999989, + 3.4072000000001, 3.97859999999991, 3.32760000000007, 5.52160000000003, 3.31860000000006, + 2.06940000000009, 4.35400000000004, 1.57500000000005, 0.280799999999999, + 2.12879999999996, -0.214799999999968, -0.0378000000000611, -0.658200000000079, + 0.654800000000023, -0.0697999999999865, 0.858400000000074, -2.52700000000004, + -2.1751999999999, -3.35539999999992, -1.04019999999991, -0.651000000000067, + -2.14439999999991, -1.96659999999997, -3.97939999999994, -0.604400000000169, + -3.08260000000018, -3.39159999999993, -5.29640000000018, -5.38920000000007, + -5.08759999999984, -4.69900000000007, -5.23720000000003, -3.15779999999995, + -4.97879999999986, -4.89899999999989, -7.48880000000008, -5.94799999999987, + -5.68060000000014, -6.67180000000008, -4.70499999999993, -7.27779999999984, + -4.6579999999999, -4.4362000000001, -4.32139999999981, -5.18859999999995, + -6.66879999999992, -6.48399999999992, -5.1260000000002, -4.4032000000002, + -6.13500000000022, -5.80819999999994, -4.16719999999987, -4.15039999999999, + -7.45600000000013, -7.24080000000004, -9.83179999999993, -5.80420000000004, + -8.6561999999999, -6.99940000000015, -10.5473999999999, -7.34139999999979, + -6.80999999999995, -6.29719999999998, -6.23199999999997, }, + // precision 10 + { 737.1256, 724.4234, 711.1064, 698.4732, 685.4636, 673.0644, 660.488, 647.9654, 636.0832, + 623.7864, 612.1992, 600.2176, 588.5228, 577.1716, 565.7752, 554.899, 543.6126, 532.6492, + 521.9474, 511.5214, 501.1064, 490.6364, 480.2468, 470.4588, 460.3832, 451.0584, 440.8606, + 431.3868, 422.5062, 413.1862, 404.463, 395.339, 386.1936, 378.1292, 369.1854, 361.2908, + 353.3324, 344.8518, 337.5204, 329.4854, 321.9318, 314.552, 306.4658, 299.4256, 292.849, + 286.152, 278.8956, 271.8792, 265.118, 258.62, 252.5132, 245.9322, 239.7726, 233.6086, + 227.5332, 222.5918, 216.4294, 210.7662, 205.4106, 199.7338, 194.9012, 188.4486, 183.1556, + 178.6338, 173.7312, 169.6264, 163.9526, 159.8742, 155.8326, 151.1966, 147.5594, 143.07, + 140.037, 134.1804, 131.071, 127.4884, 124.0848, 120.2944, 117.333, 112.9626, 110.2902, + 107.0814, 103.0334, 99.4832000000001, 96.3899999999999, 93.7202000000002, + 90.1714000000002, 87.2357999999999, 85.9346, 82.8910000000001, 80.0264000000002, + 78.3834000000002, 75.1543999999999, 73.8683999999998, 70.9895999999999, 69.4367999999999, + 64.8701999999998, 65.0408000000002, 61.6738, 59.5207999999998, 57.0158000000001, 54.2302, + 53.0962, 50.4985999999999, 52.2588000000001, 47.3914, 45.6244000000002, 42.8377999999998, + 43.0072, 40.6516000000001, 40.2453999999998, 35.2136, 36.4546, 33.7849999999999, + 33.2294000000002, 32.4679999999998, 30.8670000000002, 28.6507999999999, 28.9099999999999, + 27.5983999999999, 26.1619999999998, 24.5563999999999, 23.2328000000002, 21.9484000000002, + 21.5902000000001, 21.3346000000001, 17.7031999999999, 20.6111999999998, 19.5545999999999, + 15.7375999999999, 17.0720000000001, 16.9517999999998, 15.326, 13.1817999999998, + 14.6925999999999, 13.0859999999998, 13.2754, 10.8697999999999, 11.248, 7.3768, + 4.72339999999986, 7.97899999999981, 8.7503999999999, 7.68119999999999, 9.7199999999998, + 7.73919999999998, 5.6224000000002, 7.44560000000001, 6.6601999999998, 5.9058, + 4.00199999999995, 4.51699999999983, 4.68240000000014, 3.86220000000003, 5.13639999999987, + 5.98500000000013, 2.47719999999981, 2.61999999999989, 1.62800000000016, 4.65000000000009, + 0.225599999999758, 0.831000000000131, -0.359400000000278, 1.27599999999984, + -2.92559999999958, -0.0303999999996449, 2.37079999999969, -2.0033999999996, + 0.804600000000391, 0.30199999999968, 1.1247999999996, -2.6880000000001, + 0.0321999999996478, -1.18099999999959, -3.9402, -1.47940000000017, -0.188400000000001, + -2.10720000000038, -2.04159999999956, -3.12880000000041, -4.16160000000036, + -0.612799999999879, -3.48719999999958, -8.17900000000009, -5.37780000000021, + -4.01379999999972, -5.58259999999973, -5.73719999999958, -7.66799999999967, + -5.69520000000011, -1.1247999999996, -5.58520000000044, -8.04560000000038, + -4.64840000000004, -11.6468000000004, -7.97519999999986, -5.78300000000036, + -7.67420000000038, -10.6328000000003, -9.81720000000041, }, + // precision 11 + { 1476, 1449.6014, 1423.5802, 1397.7942, 1372.3042, 1347.2062, 1321.8402, 1297.2292, + 1272.9462, 1248.9926, 1225.3026, 1201.4252, 1178.0578, 1155.6092, 1132.626, 1110.5568, + 1088.527, 1066.5154, 1045.1874, 1024.3878, 1003.37, 982.1972, 962.5728, 942.1012, + 922.9668, 903.292, 884.0772, 864.8578, 846.6562, 828.041, 809.714, 792.3112, 775.1806, + 757.9854, 740.656, 724.346, 707.5154, 691.8378, 675.7448, 659.6722, 645.5722, 630.1462, + 614.4124, 600.8728, 585.898, 572.408, 558.4926, 544.4938, 531.6776, 517.282, 505.7704, + 493.1012, 480.7388, 467.6876, 456.1872, 445.5048, 433.0214, 420.806, 411.409, 400.4144, + 389.4294, 379.2286, 369.651, 360.6156, 350.337, 342.083, 332.1538, 322.5094, 315.01, + 305.6686, 298.1678, 287.8116, 280.9978, 271.9204, 265.3286, 257.5706, 249.6014, 242.544, + 235.5976, 229.583, 220.9438, 214.672, 208.2786, 201.8628, 195.1834, 191.505, 186.1816, + 178.5188, 172.2294, 167.8908, 161.0194, 158.052, 151.4588, 148.1596, 143.4344, 138.5238, + 133.13, 127.6374, 124.8162, 118.7894, 117.3984, 114.6078, 109.0858, 105.1036, 103.6258, + 98.6018000000004, 95.7618000000002, 93.5821999999998, 88.5900000000001, 86.9992000000002, + 82.8800000000001, 80.4539999999997, 74.6981999999998, 74.3644000000004, 73.2914000000001, + 65.5709999999999, 66.9232000000002, 65.1913999999997, 62.5882000000001, 61.5702000000001, + 55.7035999999998, 56.1764000000003, 52.7596000000003, 53.0302000000001, 49.0609999999997, + 48.4694, 44.933, 46.0474000000004, 44.7165999999997, 41.9416000000001, 39.9207999999999, + 35.6328000000003, 35.5276000000003, 33.1934000000001, 33.2371999999996, 33.3864000000003, + 33.9228000000003, 30.2371999999996, 29.1373999999996, 25.2272000000003, 24.2942000000003, + 19.8338000000003, 18.9005999999999, 23.0907999999999, 21.8544000000002, 19.5176000000001, + 15.4147999999996, 16.9314000000004, 18.6737999999996, 12.9877999999999, 14.3688000000002, + 12.0447999999997, 15.5219999999999, 12.5299999999997, 14.5940000000001, 14.3131999999996, + 9.45499999999993, 12.9441999999999, 3.91139999999996, 13.1373999999996, 5.44720000000052, + 9.82779999999912, 7.87279999999919, 3.67760000000089, 5.46980000000076, 5.55099999999948, + 5.65979999999945, 3.89439999999922, 3.1275999999998, 5.65140000000065, 6.3062000000009, + 3.90799999999945, 1.87060000000019, 5.17020000000048, 2.46680000000015, + 0.770000000000437, -3.72340000000077, 1.16400000000067, 8.05340000000069, + 0.135399999999208, 2.15940000000046, 0.766999999999825, 1.0594000000001, + 3.15500000000065, -0.287399999999252, 2.37219999999979, -2.86620000000039, + -1.63199999999961, -2.22979999999916, -0.15519999999924, -1.46039999999994, + -0.262199999999211, -2.34460000000036, -2.8078000000005, -3.22179999999935, + -5.60159999999996, -8.42200000000048, -9.43740000000071, 0.161799999999857, + -10.4755999999998, -10.0823999999993, }, + // precision 12 + { 2953, 2900.4782, 2848.3568, 2796.3666, 2745.324, 2694.9598, 2644.648, 2595.539, 2546.1474, + 2498.2576, 2450.8376, 2403.6076, 2357.451, 2311.38, 2266.4104, 2221.5638, 2176.9676, + 2134.193, 2090.838, 2048.8548, 2007.018, 1966.1742, 1925.4482, 1885.1294, 1846.4776, + 1807.4044, 1768.8724, 1731.3732, 1693.4304, 1657.5326, 1621.949, 1586.5532, 1551.7256, + 1517.6182, 1483.5186, 1450.4528, 1417.865, 1385.7164, 1352.6828, 1322.6708, 1291.8312, + 1260.9036, 1231.476, 1201.8652, 1173.6718, 1145.757, 1119.2072, 1092.2828, 1065.0434, + 1038.6264, 1014.3192, 988.5746, 965.0816, 940.1176, 917.9796, 894.5576, 871.1858, + 849.9144, 827.1142, 805.0818, 783.9664, 763.9096, 742.0816, 724.3962, 706.3454, 688.018, + 667.4214, 650.3106, 633.0686, 613.8094, 597.818, 581.4248, 563.834, 547.363, 531.5066, + 520.455400000001, 505.583199999999, 488.366, 476.480799999999, 459.7682, 450.0522, + 434.328799999999, 423.952799999999, 408.727000000001, 399.079400000001, 387.252200000001, + 373.987999999999, 360.852000000001, 351.6394, 339.642, 330.902400000001, + 322.661599999999, 311.662200000001, 301.3254, 291.7484, 279.939200000001, 276.7508, + 263.215200000001, 254.811400000001, 245.5494, 242.306399999999, 234.8734, + 223.787200000001, 217.7156, 212.0196, 200.793, 195.9748, 189.0702, 182.449199999999, + 177.2772, 170.2336, 164.741, 158.613600000001, 155.311, 147.5964, 142.837, 137.3724, + 132.0162, 130.0424, 121.9804, 120.451800000001, 114.8968, 111.585999999999, + 105.933199999999, 101.705, 98.5141999999996, 95.0488000000005, 89.7880000000005, + 91.4750000000004, 83.7764000000006, 80.9698000000008, 72.8574000000008, 73.1615999999995, + 67.5838000000003, 62.6263999999992, 63.2638000000006, 66.0977999999996, 52.0843999999997, + 58.9956000000002, 47.0912000000008, 46.4956000000002, 48.4383999999991, 47.1082000000006, + 43.2392, 37.2759999999998, 40.0283999999992, 35.1864000000005, 35.8595999999998, 32.0998, + 28.027, 23.6694000000007, 33.8266000000003, 26.3736000000008, 27.2008000000005, + 21.3245999999999, 26.4115999999995, 23.4521999999997, 19.5013999999992, 19.8513999999996, + 10.7492000000002, 18.6424000000006, 13.1265999999996, 18.2436000000016, 6.71860000000015, + 3.39459999999963, 6.33759999999893, 7.76719999999841, 0.813999999998487, + 3.82819999999992, 0.826199999999517, 8.07440000000133, -1.59080000000176, + 5.01780000000144, 0.455399999998917, -0.24199999999837, 0.174800000000687, + -9.07640000000174, -4.20160000000033, -3.77520000000004, -4.75179999999818, + -5.3724000000002, -8.90680000000066, -6.10239999999976, -5.74120000000039, + -9.95339999999851, -3.86339999999836, -13.7304000000004, -16.2710000000006, + -7.51359999999841, -3.30679999999847, -13.1339999999982, -10.0551999999989, + -6.72019999999975, -8.59660000000076, -10.9307999999983, -1.8775999999998, + -4.82259999999951, -13.7788, -21.6470000000008, -10.6735999999983, -15.7799999999988, }, + // precision 13 + { 5907.5052, 5802.2672, 5697.347, 5593.5794, 5491.2622, 5390.5514, 5290.3376, 5191.6952, + 5093.5988, 4997.3552, 4902.5972, 4808.3082, 4715.5646, 4624.109, 4533.8216, 4444.4344, + 4356.3802, 4269.2962, 4183.3784, 4098.292, 4014.79, 3932.4574, 3850.6036, 3771.2712, + 3691.7708, 3615.099, 3538.1858, 3463.4746, 3388.8496, 3315.6794, 3244.5448, 3173.7516, + 3103.3106, 3033.6094, 2966.5642, 2900.794, 2833.7256, 2769.81, 2707.3196, 2644.0778, + 2583.9916, 2523.4662, 2464.124, 2406.073, 2347.0362, 2292.1006, 2238.1716, 2182.7514, + 2128.4884, 2077.1314, 2025.037, 1975.3756, 1928.933, 1879.311, 1831.0006, 1783.2144, + 1738.3096, 1694.5144, 1649.024, 1606.847, 1564.7528, 1525.3168, 1482.5372, 1443.9668, + 1406.5074, 1365.867, 1329.2186, 1295.4186, 1257.9716, 1225.339, 1193.2972, 1156.3578, + 1125.8686, 1091.187, 1061.4094, 1029.4188, 1000.9126, 972.3272, 944.004199999999, + 915.7592, 889.965, 862.834200000001, 840.4254, 812.598399999999, 785.924200000001, + 763.050999999999, 741.793799999999, 721.466, 699.040799999999, 677.997200000002, + 649.866999999998, 634.911800000002, 609.8694, 591.981599999999, 570.2922, + 557.129199999999, 538.3858, 521.872599999999, 502.951400000002, 495.776399999999, + 475.171399999999, 459.751, 439.995200000001, 426.708999999999, 413.7016, 402.3868, + 387.262599999998, 372.0524, 357.050999999999, 342.5098, 334.849200000001, + 322.529399999999, 311.613799999999, 295.848000000002, 289.273000000001, 274.093000000001, + 263.329600000001, 251.389599999999, 245.7392, 231.9614, 229.7952, 217.155200000001, + 208.9588, 199.016599999999, 190.839199999999, 180.6976, 176.272799999999, + 166.976999999999, 162.5252, 151.196400000001, 149.386999999999, 133.981199999998, + 130.0586, 130.164000000001, 122.053400000001, 110.7428, 108.1276, 106.232400000001, + 100.381600000001, 98.7668000000012, 86.6440000000002, 79.9768000000004, 82.4722000000002, + 68.7026000000005, 70.1186000000016, 71.9948000000004, 58.998599999999, 59.0492000000013, + 56.9818000000014, 47.5338000000011, 42.9928, 51.1591999999982, 37.2740000000013, + 42.7220000000016, 31.3734000000004, 26.8090000000011, 25.8934000000008, 26.5286000000015, + 29.5442000000003, 19.3503999999994, 26.0760000000009, 17.9527999999991, 14.8419999999969, + 10.4683999999979, 8.65899999999965, 9.86720000000059, 4.34139999999752, + -0.907800000000861, -3.32080000000133, -0.936199999996461, -11.9916000000012, + -8.87000000000262, -6.33099999999831, -11.3366000000024, -15.9207999999999, + -9.34659999999712, -15.5034000000014, -19.2097999999969, -15.357799999998, + -28.2235999999975, -30.6898000000001, -19.3271999999997, -25.6083999999973, + -24.409599999999, -13.6385999999984, -33.4473999999973, -32.6949999999997, + -28.9063999999998, -31.7483999999968, -32.2935999999972, -35.8329999999987, + -47.620600000002, -39.0855999999985, -33.1434000000008, -46.1371999999974, + -37.5892000000022, -46.8164000000033, -47.3142000000007, -60.2914000000019, + -37.7575999999972, }, + // precision 14 + { 11816.475, 11605.0046, 11395.3792, 11188.7504, 10984.1814, 10782.0086, 10582.0072, + 10384.503, 10189.178, 9996.2738, 9806.0344, 9617.9798, 9431.394, 9248.7784, 9067.6894, + 8889.6824, 8712.9134, 8538.8624, 8368.4944, 8197.7956, 8031.8916, 7866.6316, 7703.733, + 7544.5726, 7386.204, 7230.666, 7077.8516, 6926.7886, 6778.6902, 6631.9632, 6487.304, + 6346.7486, 6206.4408, 6070.202, 5935.2576, 5799.924, 5671.0324, 5541.9788, 5414.6112, + 5290.0274, 5166.723, 5047.6906, 4929.162, 4815.1406, 4699.127, 4588.5606, 4477.7394, + 4369.4014, 4264.2728, 4155.9224, 4055.581, 3955.505, 3856.9618, 3761.3828, 3666.9702, + 3575.7764, 3482.4132, 3395.0186, 3305.8852, 3221.415, 3138.6024, 3056.296, 2970.4494, + 2896.1526, 2816.8008, 2740.2156, 2670.497, 2594.1458, 2527.111, 2460.8168, 2387.5114, + 2322.9498, 2260.6752, 2194.2686, 2133.7792, 2074.767, 2015.204, 1959.4226, 1898.6502, + 1850.006, 1792.849, 1741.4838, 1687.9778, 1638.1322, 1589.3266, 1543.1394, 1496.8266, + 1447.8516, 1402.7354, 1361.9606, 1327.0692, 1285.4106, 1241.8112, 1201.6726, 1161.973, + 1130.261, 1094.2036, 1048.2036, 1020.6436, 990.901400000002, 961.199800000002, + 924.769800000002, 899.526400000002, 872.346400000002, 834.375, 810.432000000001, + 780.659800000001, 756.013800000001, 733.479399999997, 707.923999999999, 673.858, + 652.222399999999, 636.572399999997, 615.738599999997, 586.696400000001, 564.147199999999, + 541.679600000003, 523.943599999999, 505.714599999999, 475.729599999999, 461.779600000002, + 449.750800000002, 439.020799999998, 412.7886, 400.245600000002, 383.188199999997, + 362.079599999997, 357.533799999997, 334.319000000003, 327.553399999997, 308.559399999998, + 291.270199999999, 279.351999999999, 271.791400000002, 252.576999999997, 247.482400000001, + 236.174800000001, 218.774599999997, 220.155200000001, 208.794399999999, 201.223599999998, + 182.995600000002, 185.5268, 164.547400000003, 176.5962, 150.689599999998, 157.8004, + 138.378799999999, 134.021200000003, 117.614399999999, 108.194000000003, 97.0696000000025, + 89.6042000000016, 95.6030000000028, 84.7810000000027, 72.635000000002, 77.3482000000004, + 59.4907999999996, 55.5875999999989, 50.7346000000034, 61.3916000000027, 50.9149999999936, + 39.0384000000049, 58.9395999999979, 29.633600000001, 28.2032000000036, 26.0078000000067, + 17.0387999999948, 9.22000000000116, 13.8387999999977, 8.07240000000456, 14.1549999999988, + 15.3570000000036, 3.42660000000615, 6.24820000000182, -2.96940000000177, + -8.79940000000352, -5.97860000000219, -14.4048000000039, -3.4143999999942, + -13.0148000000045, -11.6977999999945, -25.7878000000055, -22.3185999999987, + -24.409599999999, -31.9756000000052, -18.9722000000038, -22.8678000000073, + -30.8972000000067, -32.3715999999986, -22.3907999999938, -43.6720000000059, -35.9038, + -39.7492000000057, -54.1641999999993, -45.2749999999942, -42.2989999999991, + -44.1089999999967, -64.3564000000042, -49.9551999999967, -42.6116000000038, }, + // precision 15 + { 23634.0036, 23210.8034, 22792.4744, 22379.1524, 21969.7928, 21565.326, 21165.3532, + 20770.2806, 20379.9892, 19994.7098, 19613.318, 19236.799, 18865.4382, 18498.8244, + 18136.5138, 17778.8668, 17426.2344, 17079.32, 16734.778, 16397.2418, 16063.3324, + 15734.0232, 15409.731, 15088.728, 14772.9896, 14464.1402, 14157.5588, 13855.5958, + 13559.3296, 13264.9096, 12978.326, 12692.0826, 12413.8816, 12137.3192, 11870.2326, + 11602.5554, 11340.3142, 11079.613, 10829.5908, 10583.5466, 10334.0344, 10095.5072, + 9859.694, 9625.2822, 9395.7862, 9174.0586, 8957.3164, 8738.064, 8524.155, 8313.7396, + 8116.9168, 7913.542, 7718.4778, 7521.65, 7335.5596, 7154.2906, 6968.7396, 6786.3996, + 6613.236, 6437.406, 6270.6598, 6107.7958, 5945.7174, 5787.6784, 5635.5784, 5482.308, + 5337.9784, 5190.0864, 5045.9158, 4919.1386, 4771.817, 4645.7742, 4518.4774, 4385.5454, + 4262.6622, 4142.74679999999, 4015.5318, 3897.9276, 3790.7764, 3685.13800000001, + 3573.6274, 3467.9706, 3368.61079999999, 3271.5202, 3170.3848, 3076.4656, + 2982.38400000001, 2888.4664, 2806.4868, 2711.9564, 2634.1434, 2551.3204, 2469.7662, + 2396.61139999999, 2318.9902, 2243.8658, 2171.9246, 2105.01360000001, 2028.8536, + 1960.9952, 1901.4096, 1841.86079999999, 1777.54700000001, 1714.5802, 1654.65059999999, + 1596.311, 1546.2016, 1492.3296, 1433.8974, 1383.84600000001, 1339.4152, 1293.5518, + 1245.8686, 1193.50659999999, 1162.27959999999, 1107.19439999999, 1069.18060000001, + 1035.09179999999, 999.679000000004, 957.679999999993, 925.300199999998, 888.099400000006, + 848.638600000006, 818.156400000007, 796.748399999997, 752.139200000005, 725.271200000003, + 692.216, 671.633600000001, 647.939799999993, 621.670599999998, 575.398799999995, + 561.226599999995, 532.237999999998, 521.787599999996, 483.095799999996, 467.049599999998, + 465.286399999997, 415.548599999995, 401.047399999996, 380.607999999993, 377.362599999993, + 347.258799999996, 338.371599999999, 310.096999999994, 301.409199999995, 276.280799999993, + 265.586800000005, 258.994399999996, 223.915999999997, 215.925399999993, 213.503800000006, + 191.045400000003, 166.718200000003, 166.259000000005, 162.941200000001, 148.829400000002, + 141.645999999993, 123.535399999993, 122.329800000007, 89.473399999988, 80.1962000000058, + 77.5457999999926, 59.1056000000099, 83.3509999999951, 52.2906000000075, 36.3979999999865, + 40.6558000000077, 42.0003999999899, 19.6630000000005, 19.7153999999864, + -8.38539999999921, -0.692799999989802, 0.854800000000978, 3.23219999999856, + -3.89040000000386, -5.25880000001052, -24.9052000000083, -22.6837999999989, + -26.4286000000138, -34.997000000003, -37.0216000000073, -43.430400000012, + -58.2390000000014, -68.8034000000043, -56.9245999999985, -57.8583999999973, + -77.3097999999882, -73.2793999999994, -81.0738000000129, -87.4530000000086, + -65.0254000000132, -57.296399999992, -96.2746000000043, -103.25, -96.081600000005, + -91.5542000000132, -102.465200000006, -107.688599999994, -101.458000000013, + -109.715800000005, }, + // precision 16 + { 47270, 46423.3584, 45585.7074, 44757.152, 43938.8416, 43130.9514, 42330.03, 41540.407, + 40759.6348, 39988.206, 39226.5144, 38473.2096, 37729.795, 36997.268, 36272.6448, + 35558.665, 34853.0248, 34157.4472, 33470.5204, 32793.5742, 32127.0194, 31469.4182, + 30817.6136, 30178.6968, 29546.8908, 28922.8544, 28312.271, 27707.0924, 27114.0326, + 26526.692, 25948.6336, 25383.7826, 24823.5998, 24272.2974, 23732.2572, 23201.4976, + 22674.2796, 22163.6336, 21656.515, 21161.7362, 20669.9368, 20189.4424, 19717.3358, + 19256.3744, 18795.9638, 18352.197, 17908.5738, 17474.391, 17052.918, 16637.2236, + 16228.4602, 15823.3474, 15428.6974, 15043.0284, 14667.6278, 14297.4588, 13935.2882, + 13578.5402, 13234.6032, 12882.1578, 12548.0728, 12219.231, 11898.0072, 11587.2626, + 11279.9072, 10973.5048, 10678.5186, 10392.4876, 10105.2556, 9825.766, 9562.5444, + 9294.2222, 9038.2352, 8784.848, 8533.2644, 8301.7776, 8058.30859999999, 7822.94579999999, + 7599.11319999999, 7366.90779999999, 7161.217, 6957.53080000001, 6736.212, + 6548.21220000001, 6343.06839999999, 6156.28719999999, 5975.15419999999, 5791.75719999999, + 5621.32019999999, 5451.66, 5287.61040000001, 5118.09479999999, 4957.288, 4798.4246, + 4662.17559999999, 4512.05900000001, 4364.68539999999, 4220.77720000001, 4082.67259999999, + 3957.19519999999, 3842.15779999999, 3699.3328, 3583.01180000001, 3473.8964, + 3338.66639999999, 3233.55559999999, 3117.799, 3008.111, 2909.69140000001, + 2814.86499999999, 2719.46119999999, 2624.742, 2532.46979999999, 2444.7886, 2370.1868, + 2272.45259999999, 2196.19260000001, 2117.90419999999, 2023.2972, 1969.76819999999, + 1885.58979999999, 1833.2824, 1733.91200000001, 1682.54920000001, 1604.57980000001, + 1556.11240000001, 1491.3064, 1421.71960000001, 1371.22899999999, 1322.1324, 1264.7892, + 1196.23920000001, 1143.8474, 1088.67240000001, 1073.60380000001, 1023.11660000001, + 959.036400000012, 927.433199999999, 906.792799999996, 853.433599999989, 841.873800000001, + 791.1054, 756.899999999994, 704.343200000003, 672.495599999995, 622.790399999998, + 611.254799999995, 567.283200000005, 519.406599999988, 519.188400000014, 495.312800000014, + 451.350799999986, 443.973399999988, 431.882199999993, 392.027000000002, 380.924200000009, + 345.128999999986, 298.901400000002, 287.771999999997, 272.625, 247.253000000026, + 222.490600000019, 223.590000000026, 196.407599999977, 176.425999999978, 134.725199999986, + 132.4804, 110.445599999977, 86.7939999999944, 56.7038000000175, 64.915399999998, + 38.3726000000024, 37.1606000000029, 46.170999999973, 49.1716000000015, 15.3362000000197, + 6.71639999997569, -34.8185999999987, -39.4476000000141, 12.6830000000191, + -12.3331999999937, -50.6565999999875, -59.9538000000175, -65.1054000000004, + -70.7576000000117, -106.325200000021, -126.852200000023, -110.227599999984, + -132.885999999999, -113.897200000007, -142.713800000027, -151.145399999979, + -150.799200000009, -177.756200000003, -156.036399999983, -182.735199999996, + -177.259399999981, -198.663600000029, -174.577600000019, -193.84580000001, }, + // precision 17 + { 94541, 92848.811, 91174.019, 89517.558, 87879.9705, 86262.7565, 84663.5125, 83083.7435, + 81521.7865, 79977.272, 78455.9465, 76950.219, 75465.432, 73994.152, 72546.71, 71115.2345, + 69705.6765, 68314.937, 66944.2705, 65591.255, 64252.9485, 62938.016, 61636.8225, + 60355.592, 59092.789, 57850.568, 56624.518, 55417.343, 54231.1415, 53067.387, 51903.526, + 50774.649, 49657.6415, 48561.05, 47475.7575, 46410.159, 45364.852, 44327.053, 43318.4005, + 42325.6165, 41348.4595, 40383.6265, 39436.77, 38509.502, 37594.035, 36695.939, + 35818.6895, 34955.691, 34115.8095, 33293.949, 32465.0775, 31657.6715, 30877.2585, + 30093.78, 29351.3695, 28594.1365, 27872.115, 27168.7465, 26477.076, 25774.541, + 25106.5375, 24452.5135, 23815.5125, 23174.0655, 22555.2685, 21960.2065, 21376.3555, + 20785.1925, 20211.517, 19657.0725, 19141.6865, 18579.737, 18081.3955, 17578.995, + 17073.44, 16608.335, 16119.911, 15651.266, 15194.583, 14749.0495, 14343.4835, 13925.639, + 13504.509, 13099.3885, 12691.2855, 12328.018, 11969.0345, 11596.5145, 11245.6355, + 10917.6575, 10580.9785, 10277.8605, 9926.58100000001, 9605.538, 9300.42950000003, + 8989.97850000003, 8728.73249999998, 8448.3235, 8175.31050000002, 7898.98700000002, + 7629.79100000003, 7413.76199999999, 7149.92300000001, 6921.12650000001, 6677.1545, + 6443.28000000003, 6278.23450000002, 6014.20049999998, 5791.20299999998, 5605.78450000001, + 5438.48800000001, 5234.2255, 5059.6825, 4887.43349999998, 4682.935, 4496.31099999999, + 4322.52250000002, 4191.42499999999, 4021.24200000003, 3900.64799999999, 3762.84250000003, + 3609.98050000001, 3502.29599999997, 3363.84250000003, 3206.54849999998, 3079.70000000001, + 2971.42300000001, 2867.80349999998, 2727.08100000001, 2630.74900000001, 2496.6165, + 2440.902, 2356.19150000002, 2235.58199999999, 2120.54149999999, 2012.25449999998, + 1933.35600000003, 1820.93099999998, 1761.54800000001, 1663.09350000002, 1578.84600000002, + 1509.48149999999, 1427.3345, 1379.56150000001, 1306.68099999998, 1212.63449999999, + 1084.17300000001, 1124.16450000001, 1060.69949999999, 1007.48849999998, 941.194499999983, + 879.880500000028, 836.007500000007, 782.802000000025, 748.385499999975, 647.991500000004, + 626.730500000005, 570.776000000013, 484.000500000024, 513.98550000001, 418.985499999952, + 386.996999999974, 370.026500000036, 355.496999999974, 356.731499999994, 255.92200000002, + 259.094000000041, 205.434499999974, 165.374500000034, 197.347500000033, 95.718499999959, + 67.6165000000037, 54.6970000000438, 31.7395000000251, -15.8784999999916, + 8.42500000004657, -26.3754999999655, -118.425500000012, -66.6629999999423, + -42.9745000000112, -107.364999999991, -189.839000000036, -162.611499999999, + -164.964999999967, -189.079999999958, -223.931499999948, -235.329999999958, + -269.639500000048, -249.087999999989, -206.475499999942, -283.04449999996, + -290.667000000016, -304.561499999953, -336.784499999951, -380.386500000022, + -283.280499999993, -364.533000000054, -389.059499999974, -364.454000000027, + -415.748000000021, -417.155000000028, }, + // precision 18 + { 189083, 185696.913, 182348.774, 179035.946, 175762.762, 172526.444, 169329.754, 166166.099, + 163043.269, 159958.91, 156907.912, 153906.845, 150924.199, 147996.568, 145093.457, + 142239.233, 139421.475, 136632.27, 133889.588, 131174.2, 128511.619, 125868.621, + 123265.385, 120721.061, 118181.769, 115709.456, 113252.446, 110840.198, 108465.099, + 106126.164, 103823.469, 101556.618, 99308.004, 97124.508, 94937.803, 92833.731, + 90745.061, 88677.627, 86617.47, 84650.442, 82697.833, 80769.132, 78879.629, 77014.432, + 75215.626, 73384.587, 71652.482, 69895.93, 68209.301, 66553.669, 64921.981, 63310.323, + 61742.115, 60205.018, 58698.658, 57190.657, 55760.865, 54331.169, 52908.167, 51550.273, + 50225.254, 48922.421, 47614.533, 46362.049, 45098.569, 43926.083, 42736.03, 41593.473, + 40425.26, 39316.237, 38243.651, 37170.617, 36114.609, 35084.19, 34117.233, 33206.509, + 32231.505, 31318.728, 30403.404, 29540.0550000001, 28679.236, 27825.862, 26965.216, + 26179.148, 25462.08, 24645.952, 23922.523, 23198.144, 22529.128, 21762.4179999999, + 21134.779, 20459.117, 19840.818, 19187.04, 18636.3689999999, 17982.831, 17439.7389999999, + 16874.547, 16358.2169999999, 15835.684, 15352.914, 14823.681, 14329.313, 13816.897, + 13342.874, 12880.882, 12491.648, 12021.254, 11625.392, 11293.7610000001, 10813.697, + 10456.209, 10099.074, 9755.39000000001, 9393.18500000006, 9047.57900000003, + 8657.98499999999, 8395.85900000005, 8033, 7736.95900000003, 7430.59699999995, + 7258.47699999996, 6924.58200000005, 6691.29399999999, 6357.92500000005, 6202.05700000003, + 5921.19700000004, 5628.28399999999, 5404.96799999999, 5226.71100000001, 4990.75600000005, + 4799.77399999998, 4622.93099999998, 4472.478, 4171.78700000001, 3957.46299999999, + 3868.95200000005, 3691.14300000004, 3474.63100000005, 3341.67200000002, 3109.14000000001, + 3071.97400000005, 2796.40399999998, 2756.17799999996, 2611.46999999997, 2471.93000000005, + 2382.26399999997, 2209.22400000005, 2142.28399999999, 2013.96100000001, 1911.18999999994, + 1818.27099999995, 1668.47900000005, 1519.65800000005, 1469.67599999998, 1367.13800000004, + 1248.52899999998, 1181.23600000003, 1022.71900000004, 1088.20700000005, 959.03600000008, + 876.095999999903, 791.183999999892, 703.337000000058, 731.949999999953, 586.86400000006, + 526.024999999907, 323.004999999888, 320.448000000091, 340.672999999952, 309.638999999966, + 216.601999999955, 102.922999999952, 19.2399999999907, -0.114000000059605, + -32.6240000000689, -89.3179999999702, -153.497999999905, -64.2970000000205, + -143.695999999996, -259.497999999905, -253.017999999924, -213.948000000091, + -397.590000000084, -434.006000000052, -403.475000000093, -297.958000000101, + -404.317000000039, -528.898999999976, -506.621000000043, -513.205000000075, + -479.351000000024, -596.139999999898, -527.016999999993, -664.681000000099, + -680.306000000099, -704.050000000047, -850.486000000034, -757.43200000003, + -713.308999999892, } }; + +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java new file mode 100644 index 0000000000..00cb039db1 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLDenseRegister.java @@ -0,0 +1,192 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.ndv.hll; + +import java.util.Arrays; + +public class HLLDenseRegister implements HLLRegister { + + // 2^p number of bytes for register + private byte[] register; + + // max value stored in registered is cached to determine the bit width for + // bit packing + private int maxRegisterValue; + + // keep count of number of zeroes in registers + private int numZeroes; + + // compute and cache inverse power of 2 for register values + private double[] invPow2Register; + + // number of register bits + private int p; + + // m = 2^p + private int m; + + public HLLDenseRegister(int p) { + this(p, true); + } + + public HLLDenseRegister(int p, boolean bitPack) { + this.p = p; + this.m = 1 << p; + this.register = new byte[m]; + this.invPow2Register = new double[m]; + Arrays.fill(invPow2Register, 1.0); + this.maxRegisterValue = 0; + this.numZeroes = m; + if (bitPack == false) { + this.maxRegisterValue = 0xff; + } + } + + public boolean add(long hashcode) { + + // LSB p bits + final int registerIdx = (int) (hashcode & (m - 1)); + + // MSB 64 - p bits + final long w = hashcode >>> p; + + // longest run of trailing zeroes + final int lr = Long.numberOfTrailingZeros(w) + 1; + return set(registerIdx, (byte) lr); + } + + public boolean set(int idx, byte value) { + boolean updated = false; + if (idx < register.length && value > register[idx]) { + + // update max register value + if (value > maxRegisterValue) { + maxRegisterValue = value; + } + + // update number of zeros + if (register[idx] == 0 && value > 0) { + numZeroes--; + } + + // set register value and compute inverse pow of 2 for register value + register[idx] = value; + invPow2Register[idx] = Math.pow(2, -value); + + updated = true; + } + return updated; + } + + public int size() { + return register.length; + } + + public int getNumZeroes() { + return numZeroes; + } + + public void merge(HLLRegister hllRegister) { + if (hllRegister instanceof HLLDenseRegister) { + HLLDenseRegister hdr = (HLLDenseRegister) hllRegister; + byte[] inRegister = hdr.getRegister(); + + // merge only if the register length matches + if (register.length != inRegister.length) { + throw new IllegalArgumentException( + "The size of register sets of HyperLogLogs to be merged does not match."); + } + + // compare register values and store the max register value + for (int i = 0; i < inRegister.length; i++) { + if (inRegister[i] > register[i]) { + if (register[i] == 0) { + numZeroes--; + } + register[i] = inRegister[i]; + invPow2Register[i] = Math.pow(2, -inRegister[i]); + } + } + + // update max register value + if (hdr.getMaxRegisterValue() > maxRegisterValue) { + maxRegisterValue = hdr.getMaxRegisterValue(); + } + } else { + throw new IllegalArgumentException("Specified register is not instance of HLLDenseRegister"); + } + } + + public byte[] getRegister() { + return register; + } + + public void setRegister(byte[] register) { + this.register = register; + } + + public int getMaxRegisterValue() { + return maxRegisterValue; + } + + public double getSumInversePow2() { + double sum = 0; + for (double d : invPow2Register) { + sum += d; + } + return sum; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("HLLDenseRegister - "); + sb.append("p: "); + sb.append(p); + sb.append(" numZeroes: "); + sb.append(numZeroes); + sb.append(" maxRegisterValue: "); + sb.append(maxRegisterValue); + return sb.toString(); + } + + public String toExtendedString() { + return toString() + " register: " + Arrays.toString(register); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof HLLDenseRegister)) { + return false; + } + HLLDenseRegister other = (HLLDenseRegister) obj; + return numZeroes == other.numZeroes && maxRegisterValue == other.maxRegisterValue + && Arrays.equals(register, other.register); + } + + @Override + public int hashCode() { + int hashcode = 0; + hashcode += 31 * numZeroes; + hashcode += 31 * maxRegisterValue; + hashcode += Arrays.hashCode(register); + return hashcode; + } + +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java new file mode 100644 index 0000000000..eefc60fbd6 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLRegister.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.ndv.hll; + +public interface HLLRegister { + + /** + * Specify a hashcode to add to hyperloglog register. + * @param hashcode + * - hashcode to add + * @return true if register value is updated else false + */ + public boolean add(long hashcode); + + /** + * Instead of specifying hashcode, this interface can be used to directly + * specify the register index and register value. This interface is useful + * when reconstructing hyperloglog from a serialized representation where its + * not possible to regenerate the hashcode. + * @param idx + * - register index + * @param value + * - register value + * @return true if register value is updated else false + */ + public boolean set(int idx, byte value); + + /** + * Merge hyperloglog registers of the same type (SPARSE or DENSE register) + * @param reg + * - register to be merged + */ + public void merge(HLLRegister reg); +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java new file mode 100644 index 0000000000..05228330bc --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HLLSparseRegister.java @@ -0,0 +1,266 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.ndv.hll; + +import java.util.Map; + +import it.unimi.dsi.fastutil.ints.Int2ByteAVLTreeMap; +import it.unimi.dsi.fastutil.ints.Int2ByteSortedMap; + +public class HLLSparseRegister implements HLLRegister { + + // maintains sorted list of register indices and its corresponding values. + // Its easier to use primitive sorted map as opposed to int[] used in this + // paper + // http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf + private Int2ByteSortedMap sparseMap; + + // for a better insertion performance values are added to temporary unsorted + // list which will be merged to sparse map after a threshold + private int[] tempList; + private int tempListIdx; + + // number of register bits + private final int p; + + // new number of register bits for higher accuracy + private final int pPrime; + + // number of bits to store the number of zero runs + private final int qPrime; + + // masks for quicker extraction of p, pPrime, qPrime values + private final int mask; + private final int pPrimeMask; + private final int qPrimeMask; + + public HLLSparseRegister(int p, int pp, int qp) { + this.p = p; + this.sparseMap = new Int2ByteAVLTreeMap(); + this.tempList = new int[HLLConstants.TEMP_LIST_DEFAULT_SIZE]; + this.tempListIdx = 0; + this.pPrime = pp; + this.qPrime = qp; + this.mask = ((1 << pPrime) - 1) ^ ((1 << p) - 1); + this.pPrimeMask = ((1 << pPrime) - 1); + this.qPrimeMask = (1 << qPrime) - 1; + } + + public boolean add(long hashcode) { + boolean updated = false; + + // fill the temp list before merging to sparse map + if (tempListIdx < tempList.length) { + int encodedHash = encodeHash(hashcode); + tempList[tempListIdx++] = encodedHash; + updated = true; + } else { + updated = mergeTempListToSparseMap(); + } + + return updated; + } + + /** + * Adds temp list to sparse map. The key for sparse map entry is the register + * index determined by pPrime and value is the number of trailing zeroes. + * @return + */ + private boolean mergeTempListToSparseMap() { + boolean updated = false; + for (int i = 0; i < tempListIdx; i++) { + int encodedHash = tempList[i]; + int key = encodedHash & pPrimeMask; + byte value = (byte) (encodedHash >>> pPrime); + byte nr = 0; + // if MSB is set to 1 then next qPrime MSB bits contains the value of + // number of zeroes. + // if MSB is set to 0 then number of zeroes is contained within pPrime - p + // bits. + if (encodedHash < 0) { + nr = (byte) (value & qPrimeMask); + } else { + nr = (byte) (Integer.numberOfTrailingZeros(encodedHash >>> p) + 1); + } + updated = set(key, nr); + } + + // reset temp list index + tempListIdx = 0; + return updated; + } + + /** + *
+   * Input: 64 bit hashcode
+   * 
+   * |---------w-------------| |------------p'----------|
+   * 10101101.......1010101010 10101010101 01010101010101
+   *                                       |------p-----|
+   *                                       
+   * Output: 32 bit int
+   * 
+   * |b| |-q'-|  |------------p'----------|
+   *  1  010101  01010101010 10101010101010
+   *                         |------p-----|
+   *                    
+   * 
+   * The default values of p', q' and b are 25, 6, 1 (total 32 bits) respectively.
+   * This function will return an int encoded in the following format
+   * 
+   * p  - LSB p bits represent the register index
+   * p' - LSB p' bits are used for increased accuracy in estimation
+   * q' - q' bits after p' are left as such from the hashcode if b = 0 else
+   *      q' bits encodes the longest trailing zero runs from in (w-p) input bits
+   * b  - 0 if longest trailing zero run is contained within (p'-p) bits
+   *      1 if longest trailing zero run is computeed from (w-p) input bits and
+   *      its value is stored in q' bits
+   * 
+ * @param hashcode + * @return + */ + public int encodeHash(long hashcode) { + // x = p' - p + int x = (int) (hashcode & mask); + if (x == 0) { + // more bits should be considered for finding q (longest zero runs) + // set MSB to 1 + int ntr = Long.numberOfTrailingZeros(hashcode >> p) + 1; + long newHashCode = hashcode & pPrimeMask; + newHashCode |= ntr << pPrime; + newHashCode |= 0x80000000; + return (int) newHashCode; + } else { + // q is contained within p' - p + // set MSB to 0 + return (int) (hashcode & 0x7FFFFFFF); + } + } + + public int getSize() { + + // merge temp list before getting the size of sparse map + if (tempListIdx != 0) { + mergeTempListToSparseMap(); + } + return sparseMap.size(); + } + + public void merge(HLLRegister hllRegister) { + if (hllRegister instanceof HLLSparseRegister) { + HLLSparseRegister hsr = (HLLSparseRegister) hllRegister; + + // retain only the largest value for a register index + for (Map.Entry entry : hsr.getSparseMap().entrySet()) { + int key = entry.getKey(); + byte value = entry.getValue(); + set(key, value); + } + } else { + throw new IllegalArgumentException("Specified register not instance of HLLSparseRegister"); + } + } + + public boolean set(int key, byte value) { + boolean updated = false; + + // retain only the largest value for a register index + if (sparseMap.containsKey(key)) { + byte containedVal = sparseMap.get(key); + if (value > containedVal) { + sparseMap.put(key, value); + updated = true; + } + } else { + sparseMap.put(key, value); + updated = true; + } + return updated; + } + + public Int2ByteSortedMap getSparseMap() { + return sparseMap; + } + + public Int2ByteSortedMap getMergedSparseMap() { + if (tempListIdx != 0) { + mergeTempListToSparseMap(); + } + return sparseMap; + } + + public int getP() { + return p; + } + + public int getPPrime() { + return pPrime; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("HLLSparseRegister - "); + sb.append("p: "); + sb.append(p); + sb.append(" pPrime: "); + sb.append(pPrime); + sb.append(" qPrime: "); + sb.append(qPrime); + return sb.toString(); + } + + public String toExtendedString() { + return toString() + " register: " + sparseMap.toString(); + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof HLLSparseRegister)) { + return false; + } + HLLSparseRegister other = (HLLSparseRegister) obj; + boolean result = p == other.p && pPrime == other.pPrime && qPrime == other.qPrime + && tempListIdx == other.tempListIdx; + if (result) { + for (int i = 0; i < tempListIdx; i++) { + if (tempList[i] != other.tempList[i]) { + return false; + } + } + + result = result && sparseMap.equals(other.sparseMap); + } + return result; + } + + @Override + public int hashCode() { + int hashcode = 0; + hashcode += 31 * p; + hashcode += 31 * pPrime; + hashcode += 31 * qPrime; + for (int i = 0; i < tempListIdx; i++) { + hashcode += 31 * tempList[tempListIdx]; + } + hashcode += sparseMap.hashCode(); + return hashcode; + } + +} diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java new file mode 100644 index 0000000000..c08acf8a68 --- /dev/null +++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java @@ -0,0 +1,626 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.common.ndv.hll; + +import it.unimi.dsi.fastutil.doubles.Double2IntAVLTreeMap; +import it.unimi.dsi.fastutil.doubles.Double2IntSortedMap; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.Map; + +import org.apache.commons.codec.binary.Base64; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.ql.util.JavaDataModel; +import org.apache.hadoop.io.Text; +import org.apache.hive.common.util.Murmur3; + +/** + *
+ * This is an implementation of the following variants of hyperloglog (HLL)
+ * algorithm 
+ * Original  - Original HLL algorithm from Flajolet et. al from
+ *             http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf
+ * HLLNoBias - Google's implementation of bias correction based on lookup table
+ *             http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
+ * HLL++     - Google's implementation of HLL++ algorithm that uses SPARSE registers
+ *             http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/40671.pdf
+ *             
+ * Following are the constructor parameters that determines which algorithm is
+ * used
+ * numRegisterIndexBits - number of LSB hashcode bits to be used as register index.
+ *                        Default is 14. min = 4 and max = 16
+ * numHashBits - number of bits for hashcode. Default is 64. min = 32 and max = 128
+ * encoding - Type of encoding to use (SPARSE or DENSE). The algorithm automatically
+ *            switches to DENSE beyond a threshold. Default: SPARSE
+ * enableBitPacking - To enable bit packing or not. Bit packing improves compression
+ *                    at the cost of more CPU cycles. Default: true
+ * noBias - Use Google's bias table lookup for short range bias correction.
+ *          Enabling this will highly improve the estimation accuracy for short
+ *          range values. Default: true
+ * 
+ * 
+ */ +public class HyperLogLog extends NumDistinctValueEstimator{ + private final static int DEFAULT_HASH_BITS = 64; + private final static long HASH64_ZERO = Murmur3.hash64(new byte[] {0}); + private final static long HASH64_ONE = Murmur3.hash64(new byte[] {1}); + private final static ByteBuffer SHORT_BUFFER = ByteBuffer.allocate(Short.BYTES); + private final static ByteBuffer INT_BUFFER = ByteBuffer.allocate(Integer.BYTES); + private final static ByteBuffer LONG_BUFFER = ByteBuffer.allocate(Long.BYTES); + + public enum EncodingType { + SPARSE, DENSE + } + + // number of bits to address registers + private final int p; + + // number of registers - 2^p + private final int m; + + // refer paper + private float alphaMM; + + // enable/disable bias correction using table lookup + private final boolean noBias; + + // enable/disable bitpacking + private final boolean bitPacking; + + // Not making it configurable for perf reasons (avoid checks) + private final int chosenHashBits = DEFAULT_HASH_BITS; + + private HLLDenseRegister denseRegister; + private HLLSparseRegister sparseRegister; + + // counts are cached to avoid repeated complex computation. If register value + // is updated the count will be computed again. + private long cachedCount; + private boolean invalidateCount; + + private EncodingType encoding; + + // threshold to switch from SPARSE to DENSE encoding + private int encodingSwitchThreshold; + + private HyperLogLog(HyperLogLogBuilder hllBuilder) { + if (hllBuilder.numRegisterIndexBits < HLLConstants.MIN_P_VALUE + || hllBuilder.numRegisterIndexBits > HLLConstants.MAX_P_VALUE) { + throw new IllegalArgumentException("p value should be between " + HLLConstants.MIN_P_VALUE + + " to " + HLLConstants.MAX_P_VALUE); + } + this.p = hllBuilder.numRegisterIndexBits; + this.m = 1 << p; + this.noBias = hllBuilder.noBias; + this.bitPacking = hllBuilder.bitPacking; + + // the threshold should be less than 12K bytes for p = 14. + // The reason to divide by 5 is, in sparse mode after serialization the + // entriesin sparse map are compressed, and delta encoded as varints. The + // worst case size of varints are 5 bytes. Hence, 12K/5 ~= 2400 entries in + // sparse map. + if (bitPacking) { + this.encodingSwitchThreshold = ((m * 6) / 8) / 5; + } else { + // if bitpacking is disabled, all register values takes 8 bits and hence + // we can be more flexible with the threshold. For p=14, 16K/5 = 3200 + // entries in sparse map can be allowed. + this.encodingSwitchThreshold = m / 3; + } + + // initializeAlpha(DEFAULT_HASH_BITS); + // alphaMM value for 128 bits hash seems to perform better for default 64 hash bits + this.alphaMM = 0.7213f / (1 + 1.079f / m); + // For efficiency alpha is multiplied by m^2 + this.alphaMM = this.alphaMM * m * m; + + this.cachedCount = -1; + this.invalidateCount = false; + this.encoding = hllBuilder.encoding; + if (encoding.equals(EncodingType.SPARSE)) { + this.sparseRegister = new HLLSparseRegister(p, HLLConstants.P_PRIME_VALUE, + HLLConstants.Q_PRIME_VALUE); + this.denseRegister = null; + } else { + this.sparseRegister = null; + this.denseRegister = new HLLDenseRegister(p, bitPacking); + } + } + + public static HyperLogLogBuilder builder() { + return new HyperLogLogBuilder(); + } + + public static class HyperLogLogBuilder { + private int numRegisterIndexBits = 14; + private EncodingType encoding = EncodingType.SPARSE; + private boolean bitPacking = true; + private boolean noBias = true; + + public HyperLogLogBuilder() { + } + + public HyperLogLogBuilder setNumRegisterIndexBits(int b) { + this.numRegisterIndexBits = b; + return this; + } + + public HyperLogLogBuilder setEncoding(EncodingType enc) { + this.encoding = enc; + return this; + } + + public HyperLogLogBuilder enableBitPacking(boolean b) { + this.bitPacking = b; + return this; + } + + public HyperLogLogBuilder enableNoBias(boolean nb) { + this.noBias = nb; + return this; + } + + public HyperLogLog build() { + return new HyperLogLog(this); + } + } + + // see paper for alpha initialization. + private void initializeAlpha(final int hashBits) { + if (hashBits <= 16) { + alphaMM = 0.673f; + } else if (hashBits <= 32) { + alphaMM = 0.697f; + } else if (hashBits <= 64) { + alphaMM = 0.709f; + } else { + alphaMM = 0.7213f / (float) (1 + 1.079f / m); + } + + // For efficiency alpha is multiplied by m^2 + alphaMM = alphaMM * m * m; + } + + public void addBoolean(boolean val) { + add(val ? HASH64_ONE : HASH64_ZERO); + } + + public void addByte(byte val) { + add(Murmur3.hash64(new byte[] {val})); + } + + public void addBytes(byte[] val) { + add(Murmur3.hash64(val)); + } + + public void addShort(short val) { + SHORT_BUFFER.putShort(0, val); + add(Murmur3.hash64(SHORT_BUFFER.array())); + } + + public void addInt(int val) { + INT_BUFFER.putInt(0, val); + add(Murmur3.hash64(INT_BUFFER.array())); + } + + public void addLong(long val) { + LONG_BUFFER.putLong(0, val); + add(Murmur3.hash64(LONG_BUFFER.array())); + } + + public void addFloat(float val) { + INT_BUFFER.putFloat(0, val); + add(Murmur3.hash64(INT_BUFFER.array())); + } + + public void addDouble(double val) { + LONG_BUFFER.putDouble(0, val); + add(Murmur3.hash64(LONG_BUFFER.array())); + } + + public void addChar(char val) { + SHORT_BUFFER.putChar(0, val); + add(Murmur3.hash64(SHORT_BUFFER.array())); + } + + /** + * Java's default charset will be used for strings. + * @param val + * - input string + */ + public void addString(String val) { + add(Murmur3.hash64(val.getBytes())); + } + + public void addString(String val, Charset charset) { + add(Murmur3.hash64(val.getBytes(charset))); + } + + public void add(long hashcode) { + if (encoding.equals(EncodingType.SPARSE)) { + if (sparseRegister.add(hashcode)) { + invalidateCount = true; + } + + // if size of sparse map excess the threshold convert the sparse map to + // dense register and switch to DENSE encoding + if (sparseRegister.getSize() > encodingSwitchThreshold) { + encoding = EncodingType.DENSE; + denseRegister = sparseToDenseRegister(sparseRegister); + sparseRegister = null; + invalidateCount = true; + } + } else { + if (denseRegister.add(hashcode)) { + invalidateCount = true; + } + } + } + + public long estimateNumDistinctValues() { + + // compute count only if the register values are updated else return the + // cached count + if (invalidateCount || cachedCount < 0) { + if (encoding.equals(EncodingType.SPARSE)) { + + // if encoding is still SPARSE use linear counting with increase + // accuracy (as we use pPrime bits for register index) + int mPrime = 1 << sparseRegister.getPPrime(); + cachedCount = linearCount(mPrime, mPrime - sparseRegister.getSize()); + } else { + + // for DENSE encoding, use bias table lookup for HLLNoBias algorithm + // else fallback to HLLOriginal algorithm + double sum = denseRegister.getSumInversePow2(); + long numZeros = denseRegister.getNumZeroes(); + + // cardinality estimate from normalized bias corrected harmonic mean on + // the registers + cachedCount = (long) (alphaMM * (1.0 / sum)); + long pow = (long) Math.pow(2, chosenHashBits); + + // when bias correction is enabled + if (noBias) { + cachedCount = cachedCount <= 5 * m ? (cachedCount - estimateBias(cachedCount)) + : cachedCount; + long h = cachedCount; + if (numZeros != 0) { + h = linearCount(m, numZeros); + } + + if (h < getThreshold()) { + cachedCount = h; + } + } else { + // HLL algorithm shows stronger bias for values in (2.5 * m) range. + // To compensate for this short range bias, linear counting is used + // for values before this short range. The original paper also says + // similar bias is seen for long range values due to hash collisions + // in range >1/30*(2^32). For the default case, we do not have to + // worry about this long range bias as the paper used 32-bit hashing + // and we use 64-bit hashing as default. 2^64 values are too high to + // observe long range bias (hash collisions). + if (cachedCount <= 2.5 * m) { + + // for short range use linear counting + if (numZeros != 0) { + cachedCount = linearCount(m, numZeros); + } + } else if (chosenHashBits < 64 && cachedCount > (0.033333 * pow)) { + + // long range bias for 32-bit hashcodes + if (cachedCount > (1 / 30) * pow) { + cachedCount = (long) (-pow * Math.log(1.0 - (double) cachedCount / (double) pow)); + } + } + } + } + invalidateCount = false; + } + + return cachedCount; + } + + private long getThreshold() { + return (long) (HLLConstants.thresholdData[p - 4] + 0.5); + } + + /** + * Estimate bias from lookup table + * @param count + * - cardinality before bias correction + * @return cardinality after bias correction + */ + private long estimateBias(long count) { + double[] rawEstForP = HLLConstants.rawEstimateData[p - 4]; + + // compute distance and store it in sorted map + Double2IntSortedMap estIndexMap = new Double2IntAVLTreeMap(); + double distance = 0; + for (int i = 0; i < rawEstForP.length; i++) { + distance = Math.pow(count - rawEstForP[i], 2); + estIndexMap.put(distance, i); + } + + // take top-k closest neighbors and compute the bias corrected cardinality + long result = 0; + double[] biasForP = HLLConstants.biasData[p - 4]; + double biasSum = 0; + int kNeighbors = HLLConstants.K_NEAREST_NEIGHBOR; + for (Map.Entry entry : estIndexMap.entrySet()) { + biasSum += biasForP[entry.getValue()]; + kNeighbors--; + if (kNeighbors <= 0) { + break; + } + } + + // 0.5 added for rounding off + result = (long) ((biasSum / HLLConstants.K_NEAREST_NEIGHBOR) + 0.5); + return result; + } + + public void setCount(long count) { + this.cachedCount = count; + this.invalidateCount = true; + } + + private long linearCount(int mVal, long numZeros) { + return (long) (Math.round(mVal * Math.log(mVal / ((double) numZeros)))); + } + + // refer paper + public double getStandardError() { + return 1.04 / Math.sqrt(m); + } + + public HLLDenseRegister getHLLDenseRegister() { + return denseRegister; + } + + public HLLSparseRegister getHLLSparseRegister() { + return sparseRegister; + } + + /** + * Reconstruct sparse map from serialized integer list + * @param reg + * - uncompressed and delta decoded integer list + */ + public void setHLLSparseRegister(int[] reg) { + for (int i : reg) { + int key = i >>> HLLConstants.Q_PRIME_VALUE; + byte value = (byte) (i & 0x3f); + sparseRegister.set(key, value); + } + } + + /** + * Reconstruct dense registers from byte array + * @param reg + * - unpacked byte array + */ + public void setHLLDenseRegister(byte[] reg) { + int i = 0; + for (byte b : reg) { + denseRegister.set(i, b); + i++; + } + } + + /** + * Merge the specified hyperloglog to the current one. Encoding switches + * automatically after merge if the encoding switch threshold is exceeded. + * @param hll + * - hyperloglog to be merged + * @throws IllegalArgumentException + */ + public void merge(HyperLogLog hll) { + if (p != hll.p || chosenHashBits != hll.chosenHashBits) { + throw new IllegalArgumentException( + "HyperLogLog cannot be merged as either p or hashbits are different. Current: " + + toString() + " Provided: " + hll.toString()); + } + + EncodingType otherEncoding = hll.getEncoding(); + + if (encoding.equals(EncodingType.SPARSE) && otherEncoding.equals(EncodingType.SPARSE)) { + sparseRegister.merge(hll.getHLLSparseRegister()); + // if after merge the sparse switching threshold is exceeded then change + // to dense encoding + if (sparseRegister.getSize() > encodingSwitchThreshold) { + encoding = EncodingType.DENSE; + denseRegister = sparseToDenseRegister(sparseRegister); + sparseRegister = null; + } + } else if (encoding.equals(EncodingType.DENSE) && otherEncoding.equals(EncodingType.DENSE)) { + denseRegister.merge(hll.getHLLDenseRegister()); + } else if (encoding.equals(EncodingType.SPARSE) && otherEncoding.equals(EncodingType.DENSE)) { + denseRegister = sparseToDenseRegister(sparseRegister); + denseRegister.merge(hll.getHLLDenseRegister()); + sparseRegister = null; + encoding = EncodingType.DENSE; + } else if (encoding.equals(EncodingType.DENSE) && otherEncoding.equals(EncodingType.SPARSE)) { + HLLDenseRegister otherDenseRegister = sparseToDenseRegister(hll.getHLLSparseRegister()); + denseRegister.merge(otherDenseRegister); + } + + invalidateCount = true; + } + + /** + * Converts sparse to dense hll register + * @param sparseRegister + * - sparse register to be converted + * @return converted dense register + */ + private HLLDenseRegister sparseToDenseRegister(HLLSparseRegister sparseRegister) { + if (sparseRegister == null) { + return null; + } + int p = sparseRegister.getP(); + int pMask = (1 << p) - 1; + HLLDenseRegister result = new HLLDenseRegister(p, bitPacking); + for (Map.Entry entry : sparseRegister.getSparseMap().entrySet()) { + int key = entry.getKey(); + int idx = key & pMask; + result.set(idx, entry.getValue()); + } + return result; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Encoding: "); + sb.append(encoding); + sb.append(", p: "); + sb.append(p); + sb.append(", estimatedCardinality: "); + sb.append(estimateNumDistinctValues()); + return sb.toString(); + } + + public String toStringExtended() { + if (encoding.equals(EncodingType.DENSE)) { + return toString() + ", " + denseRegister.toExtendedString(); + } else if (encoding.equals(EncodingType.SPARSE)) { + return toString() + ", " + sparseRegister.toExtendedString(); + } + + return toString(); + } + + public int getNumRegisterIndexBits() { + return p; + } + + public EncodingType getEncoding() { + return encoding; + } + + public void setEncoding(EncodingType encoding) { + this.encoding = encoding; + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof HyperLogLog)) { + return false; + } + + HyperLogLog other = (HyperLogLog) obj; + long count = estimateNumDistinctValues(); + long otherCount = other.estimateNumDistinctValues(); + boolean result = p == other.p && chosenHashBits == other.chosenHashBits + && encoding.equals(other.encoding) && count == otherCount; + if (encoding.equals(EncodingType.DENSE)) { + result = result && denseRegister.equals(other.getHLLDenseRegister()); + } + + if (encoding.equals(EncodingType.SPARSE)) { + result = result && sparseRegister.equals(other.getHLLSparseRegister()); + } + return result; + } + + @Override + public int hashCode() { + int hashcode = 0; + hashcode += 31 * p; + hashcode += 31 * chosenHashBits; + hashcode += encoding.hashCode(); + hashcode += 31 * estimateNumDistinctValues(); + if (encoding.equals(EncodingType.DENSE)) { + hashcode += 31 * denseRegister.hashCode(); + } + + if (encoding.equals(EncodingType.SPARSE)) { + hashcode += 31 * sparseRegister.hashCode(); + } + return hashcode; + } + + @Override + public void reset() { + // TODO Auto-generated method stub + + } + + @Override + public String serialize() { + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + // write bytes to bos ... + try { + HyperLogLogUtils.serializeHLL(bos, this); + } catch (IOException e) { + throw new RuntimeException(e); + } + return Base64.encodeBase64String(bos.toByteArray()); + } + + @Override + public NumDistinctValueEstimator deserialize(String s, int numBitVectors) { + InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s)); + try { + return HyperLogLogUtils.deserializeHLL(is); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + @Override + public void addToEstimator(long v) { + addLong(v); + } + + @Override + public void addToEstimator(String s) { + addString(s); + } + + @Override + public void addToEstimator(double d) { + addDouble(d); + } + + @Override + public void addToEstimator(HiveDecimal decimal) { + addDouble(decimal.doubleValue()); + } + + @Override + public void mergeEstimators(NumDistinctValueEstimator o) { + merge((HyperLogLog) o); + } + + @Override + public int lengthFor(JavaDataModel model) { + // 5 is the head, 1<

+ * |-4 byte-|------varlong----|varint (optional)|----------| + * --------------------------------------------------------- + * | header | estimated-count | register-length | register | + * --------------------------------------------------------- + * + * 4 byte header is encoded like below + * 3 bytes - HLL magic string to identify serialized stream + * 4 bits - p (number of bits to be used as register index) + * 1 - spare bit (not used) + * 3 bits - encoding (000 - sparse, 001..110 - n bit packing, 111 - no bit packing) + * + * Followed by header are 3 fields that are required for reconstruction + * of hyperloglog + * Estimated count - variable length long to store last computed estimated count. + * This is just for quick lookup without deserializing registers + * Register length - number of entries in the register (required only for + * for sparse representation. For bit-packing, the register + * length can be found from p) + * + * @param out + * - output stream to write to + * @param hll + * - hyperloglog that needs to be serialized + * @throws IOException + */ + public static void serializeHLL(OutputStream out, HyperLogLog hll) throws IOException { + + // write header + out.write(MAGIC); + int fourthByte = 0; + int p = hll.getNumRegisterIndexBits(); + fourthByte = (p & 0xff) << 4; + + int bitWidth = 0; + EncodingType enc = hll.getEncoding(); + + // determine bit width for bitpacking and encode it in header + if (enc.equals(EncodingType.DENSE)) { + int lzr = hll.getHLLDenseRegister().getMaxRegisterValue(); + bitWidth = getBitWidth(lzr); + + // the max value of number of zeroes for 64 bit hash can be encoded using + // only 6 bits. So we will disable bit packing for any values >6 + if (bitWidth > 6) { + fourthByte |= 7; + bitWidth = 8; + } else { + fourthByte |= (bitWidth & 7); + } + } + + // write fourth byte of header + out.write(fourthByte); + + // write estimated count + long estCount = hll.estimateNumDistinctValues(); + writeVulong(out, estCount); + + // serialize dense/sparse registers. Dense registers are bitpacked whereas + // sparse registers are delta and variable length encoded + if (enc.equals(EncodingType.DENSE)) { + byte[] register = hll.getHLLDenseRegister().getRegister(); + bitpackHLLRegister(out, register, bitWidth); + } else if (enc.equals(EncodingType.SPARSE)) { + Int2ByteSortedMap sparseMap = hll.getHLLSparseRegister().getSparseMap(); + + // write the number of elements in sparse map (required for + // reconstruction) + writeVulong(out, sparseMap.size()); + + // compute deltas and write the values as varints + int prev = 0; + for (Map.Entry entry : sparseMap.entrySet()) { + if (prev == 0) { + prev = (entry.getKey() << HLLConstants.Q_PRIME_VALUE) | entry.getValue(); + writeVulong(out, prev); + } else { + int curr = (entry.getKey() << HLLConstants.Q_PRIME_VALUE) | entry.getValue(); + int delta = curr - prev; + writeVulong(out, delta); + prev = curr; + } + } + } + } + + /** + * Refer serializeHLL() for format of serialization. This funtions + * deserializes the serialized hyperloglogs + * @param in + * - input stream + * @return deserialized hyperloglog + * @throws IOException + */ + public static HyperLogLog deserializeHLL(InputStream in) throws IOException { + checkMagicString(in); + int fourthByte = in.read() & 0xff; + int p = fourthByte >>> 4; + + // read type of encoding + int enc = fourthByte & 7; + EncodingType encoding = null; + int bitSize = 0; + if (enc == 0) { + encoding = EncodingType.SPARSE; + } else if (enc > 0 && enc < 7) { + bitSize = enc; + encoding = EncodingType.DENSE; + } else { + // bit packing disabled + bitSize = 8; + encoding = EncodingType.DENSE; + } + + // estimated count + long estCount = readVulong(in); + + HyperLogLog result = null; + if (encoding.equals(EncodingType.SPARSE)) { + result = HyperLogLog.builder().setNumRegisterIndexBits(p) + .setEncoding(EncodingType.SPARSE).build(); + int numRegisterEntries = (int) readVulong(in); + int[] reg = new int[numRegisterEntries]; + int prev = 0; + + // reconstruct the sparse map from delta encoded and varint input stream + if (numRegisterEntries > 0) { + prev = (int) readVulong(in); + reg[0] = prev; + } + int delta = 0; + int curr = 0; + for (int i = 1; i < numRegisterEntries; i++) { + delta = (int) readVulong(in); + curr = prev + delta; + reg[i] = curr; + prev = curr; + } + result.setHLLSparseRegister(reg); + } else { + + // explicitly disable bit packing + if (bitSize == 8) { + result = HyperLogLog.builder().setNumRegisterIndexBits(p) + .setEncoding(EncodingType.DENSE).enableBitPacking(false).build(); + } else { + result = HyperLogLog.builder().setNumRegisterIndexBits(p) + .setEncoding(EncodingType.DENSE).enableBitPacking(true).build(); + } + int m = 1 << p; + byte[] register = unpackHLLRegister(in, m, bitSize); + result.setHLLDenseRegister(register); + } + + result.setCount(estCount); + + return result; + } + + private static void bitpackHLLRegister(OutputStream out, byte[] register, int bitWidth) + throws IOException { + int bitsLeft = 8; + byte current = 0; + + if (bitWidth == 8) { + fastPathWrite(out, register); + return; + } + + // write the blob + for (byte value : register) { + int bitsToWrite = bitWidth; + while (bitsToWrite > bitsLeft) { + // add the bits to the bottom of the current word + current |= value >>> (bitsToWrite - bitsLeft); + // subtract out the bits we just added + bitsToWrite -= bitsLeft; + // zero out the bits above bitsToWrite + value &= (1 << bitsToWrite) - 1; + out.write(current); + current = 0; + bitsLeft = 8; + } + bitsLeft -= bitsToWrite; + current |= value << bitsLeft; + if (bitsLeft == 0) { + out.write(current); + current = 0; + bitsLeft = 8; + } + } + + out.flush(); + } + + private static void fastPathWrite(OutputStream out, byte[] register) throws IOException { + for (byte b : register) { + out.write(b); + } + } + + /** + * Unpack the bitpacked HyperLogLog register. + * @param in + * - input stream + * @param length + * - serialized length + * @return unpacked HLL register + * @throws IOException + */ + private static byte[] unpackHLLRegister(InputStream in, int length, int bitSize) + throws IOException { + int mask = (1 << bitSize) - 1; + int bitsLeft = 8; + + if (bitSize == 8) { + return fastPathRead(in, length); + } + + byte current = (byte) (0xff & in.read()); + + byte[] output = new byte[length]; + for (int i = 0; i < output.length; i++) { + byte result = 0; + int bitsLeftToRead = bitSize; + while (bitsLeftToRead > bitsLeft) { + result <<= bitsLeft; + result |= current & ((1 << bitsLeft) - 1); + bitsLeftToRead -= bitsLeft; + current = (byte) (0xff & in.read()); + bitsLeft = 8; + } + if (bitsLeftToRead > 0) { + result <<= bitsLeftToRead; + bitsLeft -= bitsLeftToRead; + result |= (current >>> bitsLeft) & ((1 << bitsLeftToRead) - 1); + } + output[i] = (byte) (result & mask); + } + return output; + } + + private static byte[] fastPathRead(InputStream in, int length) throws IOException { + byte[] result = new byte[length]; + for (int i = 0; i < length; i++) { + result[i] = (byte) in.read(); + } + return result; + } + + /** + * Get estimated cardinality without deserializing HLL + * @param in + * - serialized HLL + * @return - cardinality + * @throws IOException + */ + public static long getEstimatedCountFromSerializedHLL(InputStream in) throws IOException { + checkMagicString(in); + in.read(); + return readVulong(in); + } + + /** + * Check if the specified input stream is actually a HLL stream + * @param in + * - input stream + * @throws IOException + */ + private static void checkMagicString(InputStream in) throws IOException { + byte[] magic = new byte[3]; + magic[0] = (byte) in.read(); + magic[1] = (byte) in.read(); + magic[2] = (byte) in.read(); + + if (!Arrays.equals(magic, MAGIC)) { + throw new IllegalArgumentException("The input stream is not a HyperLogLog stream."); + } + } + + /** + * Minimum bits required to encode the specified value + * @param val + * - input value + * @return + */ + private static int getBitWidth(int val) { + int count = 0; + while (val != 0) { + count++; + val = (byte) (val >>> 1); + } + return count; + } + + /** + * Return relative error between actual and estimated cardinality + * @param actualCount + * - actual count + * @param estimatedCount + * - estimated count + * @return relative error + */ + public static float getRelativeError(long actualCount, long estimatedCount) { + float err = (1.0f - ((float) estimatedCount / (float) actualCount)) * 100.0f; + return err; + } + + /** + * Write variable length encoded longs to output stream + * @param output + * - out stream + * @param value + * - long + * @throws IOException + */ + private static void writeVulong(OutputStream output, long value) throws IOException { + while (true) { + if ((value & ~0x7f) == 0) { + output.write((byte) value); + return; + } else { + output.write((byte) (0x80 | (value & 0x7f))); + value >>>= 7; + } + } + } + + /** + * Read variable length encoded longs from input stream + * @param in + * - input stream + * @return decoded long value + * @throws IOException + */ + private static long readVulong(InputStream in) throws IOException { + long result = 0; + long b; + int offset = 0; + do { + b = in.read(); + if (b == -1) { + throw new EOFException("Reading Vulong past EOF"); + } + result |= (0x7f & b) << offset; + offset += 7; + } while (b >= 0x80); + return result; + } + +} diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 5700fb9325..1242d0e712 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1720,9 +1720,10 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "This is useful to identify how tables are accessed and to determine if there are wasted columns that can be trimmed."), // standard error allowed for ndv estimates. A lower value indicates higher accuracy and a // higher compute cost. - HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0, + HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)-1.0, "Standard error expressed in percentage. Provides a tradeoff between accuracy and compute cost. \n" + - "A lower value for error indicates higher accuracy and a higher compute cost."), + "A positive value means FM-sketch is used while a negtive value means HyperLogLog will be used. \n" + + "When a positive value is used, lower value for error indicates higher accuracy and a higher compute cost\n"), HIVE_METASTORE_STATS_NDV_TUNER("hive.metastore.stats.ndv.tuner", (float)0.0, "Provides a tunable parameter between the lower bound and the higher bound of ndv for aggregate ndv across all the partitions. \n" + "The lower bound is equal to the maximum of ndv of all the partitions. The higher bound is equal to the sum of ndv of all the partitions.\n" + diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig deleted file mode 100644 index da48a7ccbd..0000000000 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java.orig +++ /dev/null @@ -1,4717 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.conf; - -import com.google.common.base.Joiner; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.classification.InterfaceAudience; -import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate; -import org.apache.hadoop.hive.conf.Validator.PatternSet; -import org.apache.hadoop.hive.conf.Validator.RangeValidator; -import org.apache.hadoop.hive.conf.Validator.RatioValidator; -import org.apache.hadoop.hive.conf.Validator.SizeValidator; -import org.apache.hadoop.hive.conf.Validator.StringSet; -import org.apache.hadoop.hive.conf.Validator.TimeValidator; -import org.apache.hadoop.hive.conf.Validator.WritableDirectoryValidator; -import org.apache.hadoop.hive.shims.Utils; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.security.UserGroupInformation; -import org.apache.hadoop.util.Shell; -import org.apache.hive.common.HiveCompat; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import javax.security.auth.login.LoginException; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.PrintStream; -import java.io.UnsupportedEncodingException; -import java.net.URI; -import java.net.URL; -import java.net.URLDecoder; -import java.net.URLEncoder; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Properties; -import java.util.Set; -import java.util.concurrent.TimeUnit; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * Hive Configuration. - */ -public class HiveConf extends Configuration { - protected String hiveJar; - protected Properties origProp; - protected String auxJars; - private static final Logger l4j = LoggerFactory.getLogger(HiveConf.class); - private static boolean loadMetastoreConfig = false; - private static boolean loadHiveServer2Config = false; - private static URL hiveDefaultURL = null; - private static URL hiveSiteURL = null; - private static URL hivemetastoreSiteUrl = null; - private static URL hiveServer2SiteUrl = null; - - private static byte[] confVarByteArray = null; - - - private static final Map vars = new HashMap(); - private static final Map metaConfs = new HashMap(); - private final List restrictList = new ArrayList(); - private final Set hiddenSet = new HashSet(); - - private Pattern modWhiteListPattern = null; - private volatile boolean isSparkConfigUpdated = false; - private static final int LOG_PREFIX_LENGTH = 64; - - public boolean getSparkConfigUpdated() { - return isSparkConfigUpdated; - } - - public void setSparkConfigUpdated(boolean isSparkConfigUpdated) { - this.isSparkConfigUpdated = isSparkConfigUpdated; - } - - public interface EncoderDecoder { - V encode(K key); - K decode(V value); - } - - public static class URLEncoderDecoder implements EncoderDecoder { - private static final String UTF_8 = "UTF-8"; - @Override - public String encode(String key) { - try { - return URLEncoder.encode(key, UTF_8); - } catch (UnsupportedEncodingException e) { - return key; - } - } - - @Override - public String decode(String value) { - try { - return URLDecoder.decode(value, UTF_8); - } catch (UnsupportedEncodingException e) { - return value; - } - } - } - public static class EncoderDecoderFactory { - public static final URLEncoderDecoder URL_ENCODER_DECODER = new URLEncoderDecoder(); - } - - static { - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); - if (classLoader == null) { - classLoader = HiveConf.class.getClassLoader(); - } - - hiveDefaultURL = classLoader.getResource("hive-default.xml"); - - // Look for hive-site.xml on the CLASSPATH and log its location if found. - hiveSiteURL = findConfigFile(classLoader, "hive-site.xml", true); - hivemetastoreSiteUrl = findConfigFile(classLoader, "hivemetastore-site.xml", false); - hiveServer2SiteUrl = findConfigFile(classLoader, "hiveserver2-site.xml", false); - - for (ConfVars confVar : ConfVars.values()) { - vars.put(confVar.varname, confVar); - } - - Set llapDaemonConfVarsSetLocal = new LinkedHashSet<>(); - populateLlapDaemonVarsSet(llapDaemonConfVarsSetLocal); - llapDaemonVarsSet = Collections.unmodifiableSet(llapDaemonConfVarsSetLocal); - } - - private static URL findConfigFile(ClassLoader classLoader, String name, boolean doLog) { - URL result = classLoader.getResource(name); - if (result == null) { - String confPath = System.getenv("HIVE_CONF_DIR"); - result = checkConfigFile(new File(confPath, name)); - if (result == null) { - String homePath = System.getenv("HIVE_HOME"); - String nameInConf = "conf" + File.pathSeparator + name; - result = checkConfigFile(new File(homePath, nameInConf)); - if (result == null) { - URI jarUri = null; - try { - jarUri = HiveConf.class.getProtectionDomain().getCodeSource().getLocation().toURI(); - } catch (Throwable e) { - if (l4j.isInfoEnabled()) { - l4j.info("Cannot get jar URI", e); - } - System.err.println("Cannot get jar URI: " + e.getMessage()); - } - result = checkConfigFile(new File(new File(jarUri).getParentFile(), nameInConf)); - } - } - } - if (doLog && l4j.isInfoEnabled()) { - l4j.info("Found configuration file " + result); - } - return result; - } - - private static URL checkConfigFile(File f) { - try { - return (f.exists() && f.isFile()) ? f.toURI().toURL() : null; - } catch (Throwable e) { - if (l4j.isInfoEnabled()) { - l4j.info("Error looking for config " + f, e); - } - System.err.println("Error looking for config " + f + ": " + e.getMessage()); - return null; - } - } - - - - - @InterfaceAudience.Private - public static final String PREFIX_LLAP = "llap."; - @InterfaceAudience.Private - public static final String PREFIX_HIVE_LLAP = "hive.llap."; - - /** - * Metastore related options that the db is initialized against. When a conf - * var in this is list is changed, the metastore instance for the CLI will - * be recreated so that the change will take effect. - */ - public static final HiveConf.ConfVars[] metaVars = { - HiveConf.ConfVars.METASTOREWAREHOUSE, - HiveConf.ConfVars.REPLDIR, - HiveConf.ConfVars.METASTOREURIS, - HiveConf.ConfVars.METASTORE_SERVER_PORT, - HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES, - HiveConf.ConfVars.METASTORETHRIFTFAILURERETRIES, - HiveConf.ConfVars.METASTORE_CLIENT_CONNECT_RETRY_DELAY, - HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, - HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_LIFETIME, - HiveConf.ConfVars.METASTOREPWD, - HiveConf.ConfVars.METASTORECONNECTURLHOOK, - HiveConf.ConfVars.METASTORECONNECTURLKEY, - HiveConf.ConfVars.METASTORESERVERMINTHREADS, - HiveConf.ConfVars.METASTORESERVERMAXTHREADS, - HiveConf.ConfVars.METASTORE_TCP_KEEP_ALIVE, - HiveConf.ConfVars.METASTORE_INT_ORIGINAL, - HiveConf.ConfVars.METASTORE_INT_ARCHIVED, - HiveConf.ConfVars.METASTORE_INT_EXTRACTED, - HiveConf.ConfVars.METASTORE_KERBEROS_KEYTAB_FILE, - HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL, - HiveConf.ConfVars.METASTORE_USE_THRIFT_SASL, - HiveConf.ConfVars.METASTORE_TOKEN_SIGNATURE, - HiveConf.ConfVars.METASTORE_CACHE_PINOBJTYPES, - HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE, - HiveConf.ConfVars.METASTORE_VALIDATE_TABLES, - HiveConf.ConfVars.METASTORE_DATANUCLEUS_INIT_COL_INFO, - HiveConf.ConfVars.METASTORE_VALIDATE_COLUMNS, - HiveConf.ConfVars.METASTORE_VALIDATE_CONSTRAINTS, - HiveConf.ConfVars.METASTORE_STORE_MANAGER_TYPE, - HiveConf.ConfVars.METASTORE_AUTO_CREATE_ALL, - HiveConf.ConfVars.METASTORE_TRANSACTION_ISOLATION, - HiveConf.ConfVars.METASTORE_CACHE_LEVEL2, - HiveConf.ConfVars.METASTORE_CACHE_LEVEL2_TYPE, - HiveConf.ConfVars.METASTORE_IDENTIFIER_FACTORY, - HiveConf.ConfVars.METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK, - HiveConf.ConfVars.METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS, - HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_MAX, - HiveConf.ConfVars.METASTORE_EVENT_LISTENERS, - HiveConf.ConfVars.METASTORE_TRANSACTIONAL_EVENT_LISTENERS, - HiveConf.ConfVars.METASTORE_EVENT_CLEAN_FREQ, - HiveConf.ConfVars.METASTORE_EVENT_EXPIRY_DURATION, - HiveConf.ConfVars.METASTORE_EVENT_MESSAGE_FACTORY, - HiveConf.ConfVars.METASTORE_FILTER_HOOK, - HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL, - HiveConf.ConfVars.METASTORE_END_FUNCTION_LISTENERS, - HiveConf.ConfVars.METASTORE_PART_INHERIT_TBL_PROPS, - HiveConf.ConfVars.METASTORE_BATCH_RETRIEVE_OBJECTS_MAX, - HiveConf.ConfVars.METASTORE_INIT_HOOKS, - HiveConf.ConfVars.METASTORE_PRE_EVENT_LISTENERS, - HiveConf.ConfVars.HMSHANDLERATTEMPTS, - HiveConf.ConfVars.HMSHANDLERINTERVAL, - HiveConf.ConfVars.HMSHANDLERFORCERELOADCONF, - HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN, - HiveConf.ConfVars.METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS, - HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES, - HiveConf.ConfVars.USERS_IN_ADMIN_ROLE, - HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, - HiveConf.ConfVars.HIVE_TXN_MANAGER, - HiveConf.ConfVars.HIVE_TXN_TIMEOUT, - HiveConf.ConfVars.HIVE_TXN_OPERATIONAL_PROPERTIES, - HiveConf.ConfVars.HIVE_TXN_HEARTBEAT_THREADPOOL_SIZE, - HiveConf.ConfVars.HIVE_TXN_MAX_OPEN_BATCH, - HiveConf.ConfVars.HIVE_TXN_RETRYABLE_SQLEX_REGEX, - HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER, - HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_ENABLED, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_SIZE, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_FPP, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_TTL, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL, - HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL, - HiveConf.ConfVars.METASTORE_FASTPATH, - HiveConf.ConfVars.METASTORE_HBASE_CATALOG_CACHE_SIZE, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_FULL, - HiveConf.ConfVars.METASTORE_HBASE_CACHE_CLEAN_UNTIL, - HiveConf.ConfVars.METASTORE_HBASE_CONNECTION_CLASS, - HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES, - HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_MEMORY_TTL, - HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY, - HiveConf.ConfVars.METASTORE_HBASE_AGGR_STATS_HBASE_TTL, - HiveConf.ConfVars.METASTORE_HBASE_FILE_METADATA_THREADS - }; - - /** - * User configurable Metastore vars - */ - public static final HiveConf.ConfVars[] metaConfVars = { - HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL, - HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL_DDL, - HiveConf.ConfVars.METASTORE_CLIENT_SOCKET_TIMEOUT, - HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN, - HiveConf.ConfVars.METASTORE_CAPABILITY_CHECK - }; - - static { - for (ConfVars confVar : metaConfVars) { - metaConfs.put(confVar.varname, confVar); - } - } - - public static final String HIVE_LLAP_DAEMON_SERVICE_PRINCIPAL_NAME = "hive.llap.daemon.service.principal"; - public static final String HIVE_SERVER2_AUTHENTICATION_LDAP_USERMEMBERSHIPKEY_NAME = - "hive.server2.authentication.ldap.userMembershipKey"; - - /** - * dbVars are the parameters can be set per database. If these - * parameters are set as a database property, when switching to that - * database, the HiveConf variable will be changed. The change of these - * parameters will effectively change the DFS and MapReduce clusters - * for different databases. - */ - public static final HiveConf.ConfVars[] dbVars = { - HiveConf.ConfVars.HADOOPBIN, - HiveConf.ConfVars.METASTOREWAREHOUSE, - HiveConf.ConfVars.SCRATCHDIR - }; - - /** - * Variables used by LLAP daemons. - * TODO: Eventually auto-populate this based on prefixes. The conf variables - * will need to be renamed for this. - */ - private static final Set llapDaemonVarsSet; - - private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal) { - llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_ENABLED.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_MEMORY_MODE.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_MIN_ALLOC.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_MAX_ALLOC.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_ARENA_COUNT.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_MEMORY_MAX_SIZE.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOCATOR_DIRECT.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_USE_LRFU.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_LRFU_LAMBDA.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_USE_FILEID_PATH.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_DECODING_METRICS_PERCENTILE_INTERVALS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_ORC_ENABLE_TIME_COUNTERS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_IO_THREADPOOL_SIZE.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_KERBEROS_PRINCIPAL.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_KERBEROS_KEYTAB_FILE.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_ZKSM_KERBEROS_PRINCIPAL.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_ZKSM_KERBEROS_KEYTAB_FILE.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_ZKSM_ZK_CONNECTION_STRING.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_SECURITY_ACL.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_MANAGEMENT_ACL.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_SECURITY_ACL_DENY.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_MANAGEMENT_ACL_DENY.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DELEGATION_TOKEN_LIFETIME.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_MANAGEMENT_RPC_PORT.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_WEB_AUTO_AUTH.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_RPC_NUM_HANDLERS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WORK_DIRS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_YARN_SHUFFLE_PORT.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_YARN_CONTAINER_MB.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_SHUFFLE_DIR_WATCHER_ENABLED.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_LIVENESS_HEARTBEAT_INTERVAL_MS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_LIVENESS_CONNECTION_TIMEOUT_MS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_LIVENESS_CONNECTION_SLEEP_BETWEEN_RETRIES_MS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_NUM_EXECUTORS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_RPC_PORT.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_MEMORY_PER_INSTANCE_MB.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_XMX_HEADROOM.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_VCPUS_PER_INSTANCE.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_NUM_FILE_CLEANER_THREADS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_FILE_CLEANUP_DELAY_SECONDS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_SERVICE_HOSTS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_SERVICE_REFRESH_INTERVAL.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_ALLOW_PERMANENT_FNS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_DOWNLOAD_PERMANENT_FNS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_TASK_SCHEDULER_WAIT_QUEUE_SIZE.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WAIT_QUEUE_COMPARATOR_CLASS_NAME.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_TASK_SCHEDULER_ENABLE_PREEMPTION.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_TASK_PREEMPTION_METRICS_INTERVALS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WEB_PORT.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_WEB_SSL.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_CONTAINER_ID.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_VALIDATE_ACLS.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_LOGGER.varname); - llapDaemonVarsSetLocal.add(ConfVars.LLAP_DAEMON_AM_USE_FQDN.varname); - } - - /** - * Get a set containing configuration parameter names used by LLAP Server isntances - * @return an unmodifiable set containing llap ConfVars - */ - public static final Set getLlapDaemonConfVars() { - return llapDaemonVarsSet; - } - - - /** - * ConfVars. - * - * These are the default configuration properties for Hive. Each HiveConf - * object is initialized as follows: - * - * 1) Hadoop configuration properties are applied. - * 2) ConfVar properties with non-null values are overlayed. - * 3) hive-site.xml properties are overlayed. - * - * WARNING: think twice before adding any Hadoop configuration properties - * with non-null values to this list as they will override any values defined - * in the underlying Hadoop configuration. - */ - public static enum ConfVars { - // QL execution stuff - SCRIPTWRAPPER("hive.exec.script.wrapper", null, ""), - PLAN("hive.exec.plan", "", ""), - STAGINGDIR("hive.exec.stagingdir", ".hive-staging", - "Directory name that will be created inside table locations in order to support HDFS encryption. " + - "This is replaces ${hive.exec.scratchdir} for query results with the exception of read-only tables. " + - "In all cases ${hive.exec.scratchdir} is still used for other temporary files, such as job plans."), - SCRATCHDIR("hive.exec.scratchdir", "/tmp/hive", - "HDFS root scratch dir for Hive jobs which gets created with write all (733) permission. " + - "For each connecting user, an HDFS scratch dir: ${hive.exec.scratchdir}/ is created, " + - "with ${hive.scratch.dir.permission}."), - REPLDIR("hive.repl.rootdir","/user/hive/repl/", - "HDFS root dir for all replication dumps."), - REPLCMENABLED("hive.repl.cm.enabled", false, - "Turn on ChangeManager, so delete files will go to cmrootdir."), - REPLCMDIR("hive.repl.cmrootdir","/user/hive/cmroot/", - "Root dir for ChangeManager, used for deleted files."), - REPLCMRETIAN("hive.repl.cm.retain","24h", - new TimeValidator(TimeUnit.HOURS), - "Time to retain removed files in cmrootdir."), - REPLCMINTERVAL("hive.repl.cm.interval","3600s", - new TimeValidator(TimeUnit.SECONDS), - "Inteval for cmroot cleanup thread."), - REPL_FUNCTIONS_ROOT_DIR("hive.repl.replica.functions.root.dir","/user/hive/repl/functions/", - "Root directory on the replica warehouse where the repl sub-system will store jars from the primary warehouse"), - LOCALSCRATCHDIR("hive.exec.local.scratchdir", - "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", - "Local scratch space for Hive jobs"), - DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir", - "${system:java.io.tmpdir}" + File.separator + "${hive.session.id}_resources", - "Temporary local directory for added resources in the remote file system."), - SCRATCHDIRPERMISSION("hive.scratch.dir.permission", "700", - "The permission for the user specific scratch directories that get created."), - SUBMITVIACHILD("hive.exec.submitviachild", false, ""), - SUBMITLOCALTASKVIACHILD("hive.exec.submit.local.task.via.child", true, - "Determines whether local tasks (typically mapjoin hashtable generation phase) runs in \n" + - "separate JVM (true recommended) or not. \n" + - "Avoids the overhead of spawning new JVM, but can lead to out-of-memory issues."), - SCRIPTERRORLIMIT("hive.exec.script.maxerrsize", 100000, - "Maximum number of bytes a script is allowed to emit to standard error (per map-reduce task). \n" + - "This prevents runaway scripts from filling logs partitions to capacity"), - ALLOWPARTIALCONSUMP("hive.exec.script.allow.partial.consumption", false, - "When enabled, this option allows a user script to exit successfully without consuming \n" + - "all the data from the standard input."), - STREAMREPORTERPERFIX("stream.stderr.reporter.prefix", "reporter:", - "Streaming jobs that log to standard error with this prefix can log counter or status information."), - STREAMREPORTERENABLED("stream.stderr.reporter.enabled", true, - "Enable consumption of status and counter messages for streaming jobs."), - COMPRESSRESULT("hive.exec.compress.output", false, - "This controls whether the final outputs of a query (to a local/HDFS file or a Hive table) is compressed. \n" + - "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"), - COMPRESSINTERMEDIATE("hive.exec.compress.intermediate", false, - "This controls whether intermediate files produced by Hive between multiple map-reduce jobs are compressed. \n" + - "The compression codec and other options are determined from Hadoop config variables mapred.output.compress*"), - COMPRESSINTERMEDIATECODEC("hive.intermediate.compression.codec", "", ""), - COMPRESSINTERMEDIATETYPE("hive.intermediate.compression.type", "", ""), - BYTESPERREDUCER("hive.exec.reducers.bytes.per.reducer", (long) (256 * 1000 * 1000), - "size per reducer.The default is 256Mb, i.e if the input size is 1G, it will use 4 reducers."), - MAXREDUCERS("hive.exec.reducers.max", 1009, - "max number of reducers will be used. If the one specified in the configuration parameter mapred.reduce.tasks is\n" + - "negative, Hive will use this one as the max number of reducers when automatically determine number of reducers."), - PREEXECHOOKS("hive.exec.pre.hooks", "", - "Comma-separated list of pre-execution hooks to be invoked for each statement. \n" + - "A pre-execution hook is specified as the name of a Java class which implements the \n" + - "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), - POSTEXECHOOKS("hive.exec.post.hooks", "", - "Comma-separated list of post-execution hooks to be invoked for each statement. \n" + - "A post-execution hook is specified as the name of a Java class which implements the \n" + - "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), - ONFAILUREHOOKS("hive.exec.failure.hooks", "", - "Comma-separated list of on-failure hooks to be invoked for each statement. \n" + - "An on-failure hook is specified as the name of Java class which implements the \n" + - "org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext interface."), - QUERYREDACTORHOOKS("hive.exec.query.redactor.hooks", "", - "Comma-separated list of hooks to be invoked for each query which can \n" + - "tranform the query before it's placed in the job.xml file. Must be a Java class which \n" + - "extends from the org.apache.hadoop.hive.ql.hooks.Redactor abstract class."), - CLIENTSTATSPUBLISHERS("hive.client.stats.publishers", "", - "Comma-separated list of statistics publishers to be invoked on counters on each job. \n" + - "A client stats publisher is specified as the name of a Java class which implements the \n" + - "org.apache.hadoop.hive.ql.stats.ClientStatsPublisher interface."), - ATSHOOKQUEUECAPACITY("hive.ats.hook.queue.capacity", 64, - "Queue size for the ATS Hook executor. If the number of outstanding submissions \n" + - "to the ATS executor exceed this amount, the Hive ATS Hook will not try to log queries to ATS."), - EXECPARALLEL("hive.exec.parallel", false, "Whether to execute jobs in parallel"), - EXECPARALLETHREADNUMBER("hive.exec.parallel.thread.number", 8, - "How many jobs at most can be executed in parallel"), - HIVESPECULATIVEEXECREDUCERS("hive.mapred.reduce.tasks.speculative.execution", true, - "Whether speculative execution for reducers should be turned on. "), - HIVECOUNTERSPULLINTERVAL("hive.exec.counters.pull.interval", 1000L, - "The interval with which to poll the JobTracker for the counters the running job. \n" + - "The smaller it is the more load there will be on the jobtracker, the higher it is the less granular the caught will be."), - DYNAMICPARTITIONING("hive.exec.dynamic.partition", true, - "Whether or not to allow dynamic partitions in DML/DDL."), - DYNAMICPARTITIONINGMODE("hive.exec.dynamic.partition.mode", "strict", - "In strict mode, the user must specify at least one static partition\n" + - "in case the user accidentally overwrites all partitions.\n" + - "In nonstrict mode all partitions are allowed to be dynamic."), - DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000, - "Maximum number of dynamic partitions allowed to be created in total."), - DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100, - "Maximum number of dynamic partitions allowed to be created in each mapper/reducer node."), - MAXCREATEDFILES("hive.exec.max.created.files", 100000L, - "Maximum number of HDFS files created by all mappers/reducers in a MapReduce job."), - DEFAULTPARTITIONNAME("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__", - "The default partition name in case the dynamic partition column value is null/empty string or any other values that cannot be escaped. \n" + - "This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). \n" + - "The user has to be aware that the dynamic partition value should not contain this value to avoid confusions."), - DEFAULT_ZOOKEEPER_PARTITION_NAME("hive.lockmgr.zookeeper.default.partition.name", "__HIVE_DEFAULT_ZOOKEEPER_PARTITION__", ""), - - // Whether to show a link to the most failed task + debugging tips - SHOW_JOB_FAIL_DEBUG_INFO("hive.exec.show.job.failure.debug.info", true, - "If a job fails, whether to provide a link in the CLI to the task with the\n" + - "most failures, along with debugging hints if applicable."), - JOB_DEBUG_CAPTURE_STACKTRACES("hive.exec.job.debug.capture.stacktraces", true, - "Whether or not stack traces parsed from the task logs of a sampled failed task \n" + - "for each failed job should be stored in the SessionState"), - JOB_DEBUG_TIMEOUT("hive.exec.job.debug.timeout", 30000, ""), - TASKLOG_DEBUG_TIMEOUT("hive.exec.tasklog.debug.timeout", 20000, ""), - OUTPUT_FILE_EXTENSION("hive.output.file.extension", null, - "String used as a file extension for output files. \n" + - "If not set, defaults to the codec extension for text files (e.g. \".gz\"), or no extension otherwise."), - - HIVE_IN_TEST("hive.in.test", false, "internal usage only, true in test mode", true), - HIVE_IN_TEST_SHORT_LOGS("hive.in.test.short.logs", false, - "internal usage only, used only in test mode. If set true, when requesting the " + - "operation logs the short version (generated by LogDivertAppenderForTest) will be " + - "returned"), - HIVE_IN_TEST_REMOVE_LOGS("hive.in.test.remove.logs", true, - "internal usage only, used only in test mode. If set false, the operation logs, and the " + - "operation log directory will not be removed, so they can be found after the test runs."), - - HIVE_IN_TEZ_TEST("hive.in.tez.test", false, "internal use only, true when in testing tez", - true), - - LOCALMODEAUTO("hive.exec.mode.local.auto", false, - "Let Hive determine whether to run in local mode automatically"), - LOCALMODEMAXBYTES("hive.exec.mode.local.auto.inputbytes.max", 134217728L, - "When hive.exec.mode.local.auto is true, input bytes should less than this for local mode."), - LOCALMODEMAXINPUTFILES("hive.exec.mode.local.auto.input.files.max", 4, - "When hive.exec.mode.local.auto is true, the number of tasks should less than this for local mode."), - - DROPIGNORESNONEXISTENT("hive.exec.drop.ignorenonexistent", true, - "Do not report an error if DROP TABLE/VIEW/Index/Function specifies a non-existent table/view/index/function"), - - HIVEIGNOREMAPJOINHINT("hive.ignore.mapjoin.hint", true, "Ignore the mapjoin hint"), - - HIVE_FILE_MAX_FOOTER("hive.file.max.footer", 100, - "maximum number of lines for footer user can define for a table file"), - - HIVE_RESULTSET_USE_UNIQUE_COLUMN_NAMES("hive.resultset.use.unique.column.names", true, - "Make column names unique in the result set by qualifying column names with table alias if needed.\n" + - "Table alias will be added to column names for queries of type \"select *\" or \n" + - "if query explicitly uses table alias \"select r1.x..\"."), - - // Hadoop Configuration Properties - // Properties with null values are ignored and exist only for the purpose of giving us - // a symbolic name to reference in the Hive source code. Properties with non-null - // values will override any values set in the underlying Hadoop configuration. - HADOOPBIN("hadoop.bin.path", findHadoopBinary(), "", true), - YARNBIN("yarn.bin.path", findYarnBinary(), "", true), - HIVE_FS_HAR_IMPL("fs.har.impl", "org.apache.hadoop.hive.shims.HiveHarFileSystem", - "The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop versions less than 0.20"), - MAPREDMAXSPLITSIZE(FileInputFormat.SPLIT_MAXSIZE, 256000000L, "", true), - MAPREDMINSPLITSIZE(FileInputFormat.SPLIT_MINSIZE, 1L, "", true), - MAPREDMINSPLITSIZEPERNODE(CombineFileInputFormat.SPLIT_MINSIZE_PERNODE, 1L, "", true), - MAPREDMINSPLITSIZEPERRACK(CombineFileInputFormat.SPLIT_MINSIZE_PERRACK, 1L, "", true), - // The number of reduce tasks per job. Hadoop sets this value to 1 by default - // By setting this property to -1, Hive will automatically determine the correct - // number of reducers. - HADOOPNUMREDUCERS("mapreduce.job.reduces", -1, "", true), - - // Metastore stuff. Be sure to update HiveConf.metaVars when you add something here! - METASTOREDBTYPE("hive.metastore.db.type", "DERBY", new StringSet("DERBY", "ORACLE", "MYSQL", "MSSQL", "POSTGRES"), - "Type of database used by the metastore. Information schema & JDBCStorageHandler depend on it."), - METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse", - "location of default database for the warehouse"), - METASTOREURIS("hive.metastore.uris", "", - "Thrift URI for the remote metastore. Used by metastore client to connect to remote metastore."), - - METASTORE_CAPABILITY_CHECK("hive.metastore.client.capability.check", true, - "Whether to check client capabilities for potentially breaking API usage."), - METASTORE_FASTPATH("hive.metastore.fastpath", false, - "Used to avoid all of the proxies and object copies in the metastore. Note, if this is " + - "set, you MUST use a local metastore (hive.metastore.uris must be empty) otherwise " + - "undefined and most likely undesired behavior will result"), - METASTORE_FS_HANDLER_THREADS_COUNT("hive.metastore.fshandler.threads", 15, - "Number of threads to be allocated for metastore handler for fs operations."), - METASTORE_HBASE_CATALOG_CACHE_SIZE("hive.metastore.hbase.catalog.cache.size", 50000, "Maximum number of " + - "objects we will place in the hbase metastore catalog cache. The objects will be divided up by " + - "types that we need to cache."), - METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.hbase.aggregate.stats.cache.size", 10000, - "Maximum number of aggregate stats nodes that we will place in the hbase metastore aggregate stats cache."), - METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.hbase.aggregate.stats.max.partitions", 10000, - "Maximum number of partitions that are aggregated per cache node."), - METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY("hive.metastore.hbase.aggregate.stats.false.positive.probability", - (float) 0.01, "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."), - METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.hbase.aggregate.stats.max.variance", (float) 0.1, - "Maximum tolerable variance in number of partitions between a cached node and our request (default 10%)."), - METASTORE_HBASE_CACHE_TIME_TO_LIVE("hive.metastore.hbase.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS), - "Number of seconds for a cached node to be active in the cache before they become stale."), - METASTORE_HBASE_CACHE_MAX_WRITER_WAIT("hive.metastore.hbase.cache.max.writer.wait", "5000ms", new TimeValidator(TimeUnit.MILLISECONDS), - "Number of milliseconds a writer will wait to acquire the writelock before giving up."), - METASTORE_HBASE_CACHE_MAX_READER_WAIT("hive.metastore.hbase.cache.max.reader.wait", "1000ms", new TimeValidator(TimeUnit.MILLISECONDS), - "Number of milliseconds a reader will wait to acquire the readlock before giving up."), - METASTORE_HBASE_CACHE_MAX_FULL("hive.metastore.hbase.cache.max.full", (float) 0.9, - "Maximum cache full % after which the cache cleaner thread kicks in."), - METASTORE_HBASE_CACHE_CLEAN_UNTIL("hive.metastore.hbase.cache.clean.until", (float) 0.8, - "The cleaner thread cleans until cache reaches this % full size."), - METASTORE_HBASE_CONNECTION_CLASS("hive.metastore.hbase.connection.class", - "org.apache.hadoop.hive.metastore.hbase.VanillaHBaseConnection", - "Class used to connection to HBase"), - METASTORE_HBASE_AGGR_STATS_CACHE_ENTRIES("hive.metastore.hbase.aggr.stats.cache.entries", - 10000, "How many in stats objects to cache in memory"), - METASTORE_HBASE_AGGR_STATS_MEMORY_TTL("hive.metastore.hbase.aggr.stats.memory.ttl", "60s", - new TimeValidator(TimeUnit.SECONDS), - "Number of seconds stats objects live in memory after they are read from HBase."), - METASTORE_HBASE_AGGR_STATS_INVALIDATOR_FREQUENCY( - "hive.metastore.hbase.aggr.stats.invalidator.frequency", "5s", - new TimeValidator(TimeUnit.SECONDS), - "How often the stats cache scans its HBase entries and looks for expired entries"), - METASTORE_HBASE_AGGR_STATS_HBASE_TTL("hive.metastore.hbase.aggr.stats.hbase.ttl", "604800s", - new TimeValidator(TimeUnit.SECONDS), - "Number of seconds stats entries live in HBase cache after they are created. They may be" + - " invalided by updates or partition drops before this. Default is one week."), - METASTORE_HBASE_FILE_METADATA_THREADS("hive.metastore.hbase.file.metadata.threads", 1, - "Number of threads to use to read file metadata in background to cache it."), - - METASTORETHRIFTCONNECTIONRETRIES("hive.metastore.connect.retries", 3, - "Number of retries while opening a connection to metastore"), - METASTORETHRIFTFAILURERETRIES("hive.metastore.failure.retries", 1, - "Number of retries upon failure of Thrift metastore calls"), - METASTORE_SERVER_PORT("hive.metastore.port", 9083, "Hive metastore listener port"), - METASTORE_CLIENT_CONNECT_RETRY_DELAY("hive.metastore.client.connect.retry.delay", "1s", - new TimeValidator(TimeUnit.SECONDS), - "Number of seconds for the client to wait between consecutive connection attempts"), - METASTORE_CLIENT_SOCKET_TIMEOUT("hive.metastore.client.socket.timeout", "600s", - new TimeValidator(TimeUnit.SECONDS), - "MetaStore Client socket timeout in seconds"), - METASTORE_CLIENT_SOCKET_LIFETIME("hive.metastore.client.socket.lifetime", "0s", - new TimeValidator(TimeUnit.SECONDS), - "MetaStore Client socket lifetime in seconds. After this time is exceeded, client\n" + - "reconnects on the next MetaStore operation. A value of 0s means the connection\n" + - "has an infinite lifetime."), - METASTOREPWD("javax.jdo.option.ConnectionPassword", "mine", - "password to use against metastore database"), - METASTORECONNECTURLHOOK("hive.metastore.ds.connection.url.hook", "", - "Name of the hook to use for retrieving the JDO connection URL. If empty, the value in javax.jdo.option.ConnectionURL is used"), - METASTOREMULTITHREADED("javax.jdo.option.Multithreaded", true, - "Set this to true if multiple threads access metastore through JDO concurrently."), - METASTORECONNECTURLKEY("javax.jdo.option.ConnectionURL", - "jdbc:derby:;databaseName=metastore_db;create=true", - "JDBC connect string for a JDBC metastore.\n" + - "To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.\n" + - "For example, jdbc:postgresql://myhost/db?ssl=true for postgres database."), - METASTORE_DBACCESS_SSL_PROPS("hive.metastore.dbaccess.ssl.properties", "", - "Comma-separated SSL properties for metastore to access database when JDO connection URL\n" + - "enables SSL access. e.g. javax.net.ssl.trustStore=/tmp/truststore,javax.net.ssl.trustStorePassword=pwd."), - HMSHANDLERATTEMPTS("hive.hmshandler.retry.attempts", 10, - "The number of times to retry a HMSHandler call if there were a connection error."), - HMSHANDLERINTERVAL("hive.hmshandler.retry.interval", "2000ms", - new TimeValidator(TimeUnit.MILLISECONDS), "The time between HMSHandler retry attempts on failure."), - HMSHANDLERFORCERELOADCONF("hive.hmshandler.force.reload.conf", false, - "Whether to force reloading of the HMSHandler configuration (including\n" + - "the connection URL, before the next metastore query that accesses the\n" + - "datastore. Once reloaded, this value is reset to false. Used for\n" + - "testing only."), - METASTORESERVERMAXMESSAGESIZE("hive.metastore.server.max.message.size", 100*1024*1024L, - "Maximum message size in bytes a HMS will accept."), - METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200, - "Minimum number of worker threads in the Thrift server's pool."), - METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", 1000, - "Maximum number of worker threads in the Thrift server's pool."), - METASTORE_TCP_KEEP_ALIVE("hive.metastore.server.tcp.keepalive", true, - "Whether to enable TCP keepalive for the metastore server. Keepalive will prevent accumulation of half-open connections."), - - METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original", - "_INTERMEDIATE_ORIGINAL", - "Intermediate dir suffixes used for archiving. Not important what they\n" + - "are, as long as collisions are avoided"), - METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived", - "_INTERMEDIATE_ARCHIVED", ""), - METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted", - "_INTERMEDIATE_EXTRACTED", ""), - METASTORE_KERBEROS_KEYTAB_FILE("hive.metastore.kerberos.keytab.file", "", - "The path to the Kerberos Keytab file containing the metastore Thrift server's service principal."), - METASTORE_KERBEROS_PRINCIPAL("hive.metastore.kerberos.principal", - "hive-metastore/_HOST@EXAMPLE.COM", - "The service principal for the metastore Thrift server. \n" + - "The special string _HOST will be replaced automatically with the correct host name."), - METASTORE_USE_THRIFT_SASL("hive.metastore.sasl.enabled", false, - "If true, the metastore Thrift interface will be secured with SASL. Clients must authenticate with Kerberos."), - METASTORE_USE_THRIFT_FRAMED_TRANSPORT("hive.metastore.thrift.framed.transport.enabled", false, - "If true, the metastore Thrift interface will use TFramedTransport. When false (default) a standard TTransport is used."), - METASTORE_USE_THRIFT_COMPACT_PROTOCOL("hive.metastore.thrift.compact.protocol.enabled", false, - "If true, the metastore Thrift interface will use TCompactProtocol. When false (default) TBinaryProtocol will be used.\n" + - "Setting it to true will break compatibility with older clients running TBinaryProtocol."), - METASTORE_TOKEN_SIGNATURE("hive.metastore.token.signature", "", - "The delegation token service name to match when selecting a token from the current user's tokens."), - METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_CLS("hive.cluster.delegation.token.store.class", - "org.apache.hadoop.hive.thrift.MemoryTokenStore", - "The delegation token store implementation. Set to org.apache.hadoop.hive.thrift.ZooKeeperTokenStore for load-balanced cluster."), - METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_CONNECTSTR( - "hive.cluster.delegation.token.store.zookeeper.connectString", "", - "The ZooKeeper token store connect string. You can re-use the configuration value\n" + - "set in hive.zookeeper.quorum, by leaving this parameter unset."), - METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ZNODE( - "hive.cluster.delegation.token.store.zookeeper.znode", "/hivedelegation", - "The root path for token store data. Note that this is used by both HiveServer2 and\n" + - "MetaStore to store delegation Token. One directory gets created for each of them.\n" + - "The final directory names would have the servername appended to it (HIVESERVER2,\n" + - "METASTORE)."), - METASTORE_CLUSTER_DELEGATION_TOKEN_STORE_ZK_ACL( - "hive.cluster.delegation.token.store.zookeeper.acl", "", - "ACL for token store entries. Comma separated list of ACL entries. For example:\n" + - "sasl:hive/host1@MY.DOMAIN:cdrwa,sasl:hive/host2@MY.DOMAIN:cdrwa\n" + - "Defaults to all permissions for the hiveserver2/metastore process user."), - METASTORE_CACHE_PINOBJTYPES("hive.metastore.cache.pinobjtypes", "Table,StorageDescriptor,SerDeInfo,Partition,Database,Type,FieldSchema,Order", - "List of comma separated metastore object types that should be pinned in the cache"), - METASTORE_CONNECTION_POOLING_TYPE("datanucleus.connectionPoolingType", "HikariCP", new StringSet("BONECP", "DBCP", - "HikariCP", "NONE"), - "Specify connection pool library for datanucleus"), - METASTORE_CONNECTION_POOLING_MAX_CONNECTIONS("datanucleus.connectionPool.maxPoolSize", 10, - "Specify the maximum number of connections in the connection pool. Note: The configured size will be used by\n" + - "2 connection pools (TxnHandler and ObjectStore). When configuring the max connection pool size, it is\n" + - "recommended to take into account the number of metastore instances and the number of HiveServer2 instances\n" + - "configured with embedded metastore. To get optimal performance, set config to meet the following condition\n"+ - "(2 * pool_size * metastore_instances + 2 * pool_size * HS2_instances_with_embedded_metastore) = \n" + - "(2 * physical_core_count + hard_disk_count)."), - // Workaround for DN bug on Postgres: - // http://www.datanucleus.org/servlet/forum/viewthread_thread,7985_offset - METASTORE_DATANUCLEUS_INIT_COL_INFO("datanucleus.rdbms.initializeColumnInfo", "NONE", - "initializeColumnInfo setting for DataNucleus; set to NONE at least on Postgres."), - METASTORE_VALIDATE_TABLES("datanucleus.schema.validateTables", false, - "validates existing schema against code. turn this on if you want to verify existing schema"), - METASTORE_VALIDATE_COLUMNS("datanucleus.schema.validateColumns", false, - "validates existing schema against code. turn this on if you want to verify existing schema"), - METASTORE_VALIDATE_CONSTRAINTS("datanucleus.schema.validateConstraints", false, - "validates existing schema against code. turn this on if you want to verify existing schema"), - METASTORE_STORE_MANAGER_TYPE("datanucleus.storeManagerType", "rdbms", "metadata store type"), - METASTORE_AUTO_CREATE_ALL("datanucleus.schema.autoCreateAll", false, - "Auto creates necessary schema on a startup if one doesn't exist. Set this to false, after creating it once." - + "To enable auto create also set hive.metastore.schema.verification=false. Auto creation is not " - + "recommended for production use cases, run schematool command instead." ), - METASTORE_SCHEMA_VERIFICATION("hive.metastore.schema.verification", true, - "Enforce metastore schema version consistency.\n" + - "True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic\n" + - " schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures\n" + - " proper metastore schema migration. (Default)\n" + - "False: Warn if the version information stored in metastore doesn't match with one from in Hive jars."), - METASTORE_SCHEMA_VERIFICATION_RECORD_VERSION("hive.metastore.schema.verification.record.version", false, - "When true the current MS version is recorded in the VERSION table. If this is disabled and verification is\n" + - " enabled the MS will be unusable."), - METASTORE_SCHEMA_INFO_CLASS("hive.metastore.schema.info.class", - "org.apache.hadoop.hive.metastore.MetaStoreSchemaInfo", - "Fully qualified class name for the metastore schema information class \n" - + "which is used by schematool to fetch the schema information.\n" - + " This class should implement the IMetaStoreSchemaInfo interface"), - METASTORE_TRANSACTION_ISOLATION("datanucleus.transactionIsolation", "read-committed", - "Default transaction isolation level for identity generation."), - METASTORE_CACHE_LEVEL2("datanucleus.cache.level2", false, - "Use a level 2 cache. Turn this off if metadata is changed independently of Hive metastore server"), - METASTORE_CACHE_LEVEL2_TYPE("datanucleus.cache.level2.type", "none", ""), - METASTORE_IDENTIFIER_FACTORY("datanucleus.identifierFactory", "datanucleus1", - "Name of the identifier factory to use when generating table/column names etc. \n" + - "'datanucleus1' is used for backward compatibility with DataNucleus v1"), - METASTORE_USE_LEGACY_VALUE_STRATEGY("datanucleus.rdbms.useLegacyNativeValueStrategy", true, ""), - METASTORE_PLUGIN_REGISTRY_BUNDLE_CHECK("datanucleus.plugin.pluginRegistryBundleCheck", "LOG", - "Defines what happens when plugin bundles are found and are duplicated [EXCEPTION|LOG|NONE]"), - METASTORE_BATCH_RETRIEVE_MAX("hive.metastore.batch.retrieve.max", 300, - "Maximum number of objects (tables/partitions) can be retrieved from metastore in one batch. \n" + - "The higher the number, the less the number of round trips is needed to the Hive metastore server, \n" + - "but it may also cause higher memory requirement at the client side."), - METASTORE_BATCH_RETRIEVE_OBJECTS_MAX( - "hive.metastore.batch.retrieve.table.partition.max", 1000, - "Maximum number of objects that metastore internally retrieves in one batch."), - - METASTORE_INIT_HOOKS("hive.metastore.init.hooks", "", - "A comma separated list of hooks to be invoked at the beginning of HMSHandler initialization. \n" + - "An init hook is specified as the name of Java class which extends org.apache.hadoop.hive.metastore.MetaStoreInitListener."), - METASTORE_PRE_EVENT_LISTENERS("hive.metastore.pre.event.listeners", "", - "List of comma separated listeners for metastore events."), - METASTORE_EVENT_LISTENERS("hive.metastore.event.listeners", "", - "A comma separated list of Java classes that implement the org.apache.hadoop.hive.metastore.MetaStoreEventListener" + - " interface. The metastore event and corresponding listener method will be invoked in separate JDO transactions. " + - "Alternatively, configure hive.metastore.transactional.event.listeners to ensure both are invoked in same JDO transaction."), - METASTORE_TRANSACTIONAL_EVENT_LISTENERS("hive.metastore.transactional.event.listeners", "", - "A comma separated list of Java classes that implement the org.apache.hadoop.hive.metastore.MetaStoreEventListener" + - " interface. Both the metastore event and corresponding listener method will be invoked in the same JDO transaction."), - METASTORE_EVENT_DB_LISTENER_TTL("hive.metastore.event.db.listener.timetolive", "86400s", - new TimeValidator(TimeUnit.SECONDS), - "time after which events will be removed from the database listener queue"), - METASTORE_AUTHORIZATION_STORAGE_AUTH_CHECKS("hive.metastore.authorization.storage.checks", false, - "Should the metastore do authorization checks against the underlying storage (usually hdfs) \n" + - "for operations like drop-partition (disallow the drop-partition if the user in\n" + - "question doesn't have permissions to delete the corresponding directory\n" + - "on the storage)."), - METASTORE_AUTHORIZATION_EXTERNALTABLE_DROP_CHECK("hive.metastore.authorization.storage.check.externaltable.drop", true, - "Should StorageBasedAuthorization check permission of the storage before dropping external table.\n" + - "StorageBasedAuthorization already does this check for managed table. For external table however,\n" + - "anyone who has read permission of the directory could drop external table, which is surprising.\n" + - "The flag is set to false by default to maintain backward compatibility."), - METASTORE_EVENT_CLEAN_FREQ("hive.metastore.event.clean.freq", "0s", - new TimeValidator(TimeUnit.SECONDS), - "Frequency at which timer task runs to purge expired events in metastore."), - METASTORE_EVENT_EXPIRY_DURATION("hive.metastore.event.expiry.duration", "0s", - new TimeValidator(TimeUnit.SECONDS), - "Duration after which events expire from events table"), - METASTORE_EVENT_MESSAGE_FACTORY("hive.metastore.event.message.factory", - "org.apache.hadoop.hive.metastore.messaging.json.JSONMessageFactory", - "Factory class for making encoding and decoding messages in the events generated."), - METASTORE_EXECUTE_SET_UGI("hive.metastore.execute.setugi", true, - "In unsecure mode, setting this property to true will cause the metastore to execute DFS operations using \n" + - "the client's reported user and group permissions. Note that this property must be set on \n" + - "both the client and server sides. Further note that its best effort. \n" + - "If client sets its to true and server sets it to false, client setting will be ignored."), - METASTORE_PARTITION_NAME_WHITELIST_PATTERN("hive.metastore.partition.name.whitelist.pattern", "", - "Partition names will be checked against this regex pattern and rejected if not matched."), - - METASTORE_INTEGER_JDO_PUSHDOWN("hive.metastore.integral.jdo.pushdown", false, - "Allow JDO query pushdown for integral partition columns in metastore. Off by default. This\n" + - "improves metastore perf for integral columns, especially if there's a large number of partitions.\n" + - "However, it doesn't work correctly with integral values that are not normalized (e.g. have\n" + - "leading zeroes, like 0012). If metastore direct SQL is enabled and works, this optimization\n" + - "is also irrelevant."), - METASTORE_TRY_DIRECT_SQL("hive.metastore.try.direct.sql", true, - "Whether the Hive metastore should try to use direct SQL queries instead of the\n" + - "DataNucleus for certain read paths. This can improve metastore performance when\n" + - "fetching many partitions or column statistics by orders of magnitude; however, it\n" + - "is not guaranteed to work on all RDBMS-es and all versions. In case of SQL failures,\n" + - "the metastore will fall back to the DataNucleus, so it's safe even if SQL doesn't\n" + - "work for all queries on your datastore. If all SQL queries fail (for example, your\n" + - "metastore is backed by MongoDB), you might want to disable this to save the\n" + - "try-and-fall-back cost."), - METASTORE_DIRECT_SQL_PARTITION_BATCH_SIZE("hive.metastore.direct.sql.batch.size", 0, - "Batch size for partition and other object retrieval from the underlying DB in direct\n" + - "SQL. For some DBs like Oracle and MSSQL, there are hardcoded or perf-based limitations\n" + - "that necessitate this. For DBs that can handle the queries, this isn't necessary and\n" + - "may impede performance. -1 means no batching, 0 means automatic batching."), - METASTORE_TRY_DIRECT_SQL_DDL("hive.metastore.try.direct.sql.ddl", true, - "Same as hive.metastore.try.direct.sql, for read statements within a transaction that\n" + - "modifies metastore data. Due to non-standard behavior in Postgres, if a direct SQL\n" + - "select query has incorrect syntax or something similar inside a transaction, the\n" + - "entire transaction will fail and fall-back to DataNucleus will not be possible. You\n" + - "should disable the usage of direct SQL inside transactions if that happens in your case."), - METASTORE_DIRECT_SQL_MAX_QUERY_LENGTH("hive.direct.sql.max.query.length", 100, "The maximum\n" + - " size of a query string (in KB)."), - METASTORE_DIRECT_SQL_MAX_ELEMENTS_IN_CLAUSE("hive.direct.sql.max.elements.in.clause", 1000, - "The maximum number of values in a IN clause. Once exceeded, it will be broken into\n" + - " multiple OR separated IN clauses."), - METASTORE_DIRECT_SQL_MAX_ELEMENTS_VALUES_CLAUSE("hive.direct.sql.max.elements.values.clause", - 1000, "The maximum number of values in a VALUES clause for INSERT statement."), - METASTORE_ORM_RETRIEVE_MAPNULLS_AS_EMPTY_STRINGS("hive.metastore.orm.retrieveMapNullsAsEmptyStrings",false, - "Thrift does not support nulls in maps, so any nulls present in maps retrieved from ORM must " + - "either be pruned or converted to empty strings. Some backing dbs such as Oracle persist empty strings " + - "as nulls, so we should set this parameter if we wish to reverse that behaviour. For others, " + - "pruning is the correct behaviour"), - METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES( - "hive.metastore.disallow.incompatible.col.type.changes", true, - "If true (default is false), ALTER TABLE operations which change the type of a\n" + - "column (say STRING) to an incompatible type (say MAP) are disallowed.\n" + - "RCFile default SerDe (ColumnarSerDe) serializes the values in such a way that the\n" + - "datatypes can be converted from string to any type. The map is also serialized as\n" + - "a string, which can be read as a string as well. However, with any binary\n" + - "serialization, this is not true. Blocking the ALTER TABLE prevents ClassCastExceptions\n" + - "when subsequently trying to access old partitions.\n" + - "\n" + - "Primitive types like INT, STRING, BIGINT, etc., are compatible with each other and are\n" + - "not blocked.\n" + - "\n" + - "See HIVE-4409 for more details."), - METASTORE_LIMIT_PARTITION_REQUEST("hive.metastore.limit.partition.request", -1, - "This limits the number of partitions that can be requested from the metastore for a given table.\n" + - "The default value \"-1\" means no limit."), - - NEWTABLEDEFAULTPARA("hive.table.parameters.default", "", - "Default property values for newly created tables"), - DDL_CTL_PARAMETERS_WHITELIST("hive.ddl.createtablelike.properties.whitelist", "", - "Table Properties to copy over when executing a Create Table Like."), - METASTORE_RAW_STORE_IMPL("hive.metastore.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore", - "Name of the class that implements org.apache.hadoop.hive.metastore.rawstore interface. \n" + - "This class is used to store and retrieval of raw metadata objects such as table, database"), - METASTORE_CACHED_RAW_STORE_IMPL("hive.metastore.cached.rawstore.impl", "org.apache.hadoop.hive.metastore.ObjectStore", - "Name of the wrapped RawStore class"), - METASTORE_CACHED_RAW_STORE_CACHE_UPDATE_FREQUENCY( - "hive.metastore.cached.rawstore.cache.update.frequency", "60", new TimeValidator( - TimeUnit.SECONDS), - "The time after which metastore cache is updated from metastore DB."), - METASTORE_TXN_STORE_IMPL("hive.metastore.txn.store.impl", - "org.apache.hadoop.hive.metastore.txn.CompactionTxnHandler", - "Name of class that implements org.apache.hadoop.hive.metastore.txn.TxnStore. This " + - "class is used to store and retrieve transactions and locks"), - METASTORE_CONNECTION_DRIVER("javax.jdo.option.ConnectionDriverName", "org.apache.derby.jdbc.EmbeddedDriver", - "Driver class name for a JDBC metastore"), - METASTORE_MANAGER_FACTORY_CLASS("javax.jdo.PersistenceManagerFactoryClass", - "org.datanucleus.api.jdo.JDOPersistenceManagerFactory", - "class implementing the jdo persistence"), - METASTORE_EXPRESSION_PROXY_CLASS("hive.metastore.expression.proxy", - "org.apache.hadoop.hive.ql.optimizer.ppr.PartitionExpressionForMetastore", ""), - METASTORE_DETACH_ALL_ON_COMMIT("javax.jdo.option.DetachAllOnCommit", true, - "Detaches all objects from session so that they can be used after transaction is committed"), - METASTORE_NON_TRANSACTIONAL_READ("javax.jdo.option.NonTransactionalRead", true, - "Reads outside of transactions"), - METASTORE_CONNECTION_USER_NAME("javax.jdo.option.ConnectionUserName", "APP", - "Username to use against metastore database"), - METASTORE_END_FUNCTION_LISTENERS("hive.metastore.end.function.listeners", "", - "List of comma separated listeners for the end of metastore functions."), - METASTORE_PART_INHERIT_TBL_PROPS("hive.metastore.partition.inherit.table.properties", "", - "List of comma separated keys occurring in table properties which will get inherited to newly created partitions. \n" + - "* implies all the keys will get inherited."), - METASTORE_FILTER_HOOK("hive.metastore.filter.hook", "org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl", - "Metastore hook class for filtering the metadata read results. If hive.security.authorization.manager" - + "is set to instance of HiveAuthorizerFactory, then this value is ignored."), - FIRE_EVENTS_FOR_DML("hive.metastore.dml.events", false, "If true, the metastore will be asked" + - " to fire events for DML operations"), - METASTORE_CLIENT_DROP_PARTITIONS_WITH_EXPRESSIONS("hive.metastore.client.drop.partitions.using.expressions", true, - "Choose whether dropping partitions with HCatClient pushes the partition-predicate to the metastore, " + - "or drops partitions iteratively"), - - METASTORE_AGGREGATE_STATS_CACHE_ENABLED("hive.metastore.aggregate.stats.cache.enabled", true, - "Whether aggregate stats caching is enabled or not."), - METASTORE_AGGREGATE_STATS_CACHE_SIZE("hive.metastore.aggregate.stats.cache.size", 10000, - "Maximum number of aggregate stats nodes that we will place in the metastore aggregate stats cache."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS("hive.metastore.aggregate.stats.cache.max.partitions", 10000, - "Maximum number of partitions that are aggregated per cache node."), - METASTORE_AGGREGATE_STATS_CACHE_FPP("hive.metastore.aggregate.stats.cache.fpp", (float) 0.01, - "Maximum false positive probability for the Bloom Filter used in each aggregate stats cache node (default 1%)."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE("hive.metastore.aggregate.stats.cache.max.variance", (float) 0.01, - "Maximum tolerable variance in number of partitions between a cached node and our request (default 1%)."), - METASTORE_AGGREGATE_STATS_CACHE_TTL("hive.metastore.aggregate.stats.cache.ttl", "600s", new TimeValidator(TimeUnit.SECONDS), - "Number of seconds for a cached node to be active in the cache before they become stale."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT("hive.metastore.aggregate.stats.cache.max.writer.wait", "5000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Number of milliseconds a writer will wait to acquire the writelock before giving up."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT("hive.metastore.aggregate.stats.cache.max.reader.wait", "1000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Number of milliseconds a reader will wait to acquire the readlock before giving up."), - METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL("hive.metastore.aggregate.stats.cache.max.full", (float) 0.9, - "Maximum cache full % after which the cache cleaner thread kicks in."), - METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL("hive.metastore.aggregate.stats.cache.clean.until", (float) 0.8, - "The cleaner thread cleans until cache reaches this % full size."), - METASTORE_METRICS("hive.metastore.metrics.enabled", false, "Enable metrics on the metastore."), - METASTORE_INIT_METADATA_COUNT_ENABLED("hive.metastore.initial.metadata.count.enabled", true, - "Enable a metadata count at metastore startup for metrics."), - - // Metastore SSL settings - HIVE_METASTORE_USE_SSL("hive.metastore.use.SSL", false, - "Set this to true for using SSL encryption in HMS server."), - HIVE_METASTORE_SSL_KEYSTORE_PATH("hive.metastore.keystore.path", "", - "Metastore SSL certificate keystore location."), - HIVE_METASTORE_SSL_KEYSTORE_PASSWORD("hive.metastore.keystore.password", "", - "Metastore SSL certificate keystore password."), - HIVE_METASTORE_SSL_TRUSTSTORE_PATH("hive.metastore.truststore.path", "", - "Metastore SSL certificate truststore location."), - HIVE_METASTORE_SSL_TRUSTSTORE_PASSWORD("hive.metastore.truststore.password", "", - "Metastore SSL certificate truststore password."), - - // Parameters for exporting metadata on table drop (requires the use of the) - // org.apache.hadoop.hive.ql.parse.MetaDataExportListener preevent listener - METADATA_EXPORT_LOCATION("hive.metadata.export.location", "", - "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + - "it is the location to which the metadata will be exported. The default is an empty string, which results in the \n" + - "metadata being exported to the current user's home directory on HDFS."), - MOVE_EXPORTED_METADATA_TO_TRASH("hive.metadata.move.exported.metadata.to.trash", true, - "When used in conjunction with the org.apache.hadoop.hive.ql.parse.MetaDataExportListener pre event listener, \n" + - "this setting determines if the metadata that is exported will subsequently be moved to the user's trash directory \n" + - "alongside the dropped table data. This ensures that the metadata will be cleaned up along with the dropped table data."), - - // CLI - CLIIGNOREERRORS("hive.cli.errors.ignore", false, ""), - CLIPRINTCURRENTDB("hive.cli.print.current.db", false, - "Whether to include the current database in the Hive prompt."), - CLIPROMPT("hive.cli.prompt", "hive", - "Command line prompt configuration value. Other hiveconf can be used in this configuration value. \n" + - "Variable substitution will only be invoked at the Hive CLI startup."), - CLIPRETTYOUTPUTNUMCOLS("hive.cli.pretty.output.num.cols", -1, - "The number of columns to use when formatting output generated by the DESCRIBE PRETTY table_name command.\n" + - "If the value of this property is -1, then Hive will use the auto-detected terminal width."), - - HIVE_METASTORE_FS_HANDLER_CLS("hive.metastore.fs.handler.class", "org.apache.hadoop.hive.metastore.HiveMetaStoreFsImpl", ""), - - // Things we log in the jobconf - - // session identifier - HIVESESSIONID("hive.session.id", "", ""), - // whether session is running in silent mode or not - HIVESESSIONSILENT("hive.session.silent", false, ""), - - HIVE_SESSION_HISTORY_ENABLED("hive.session.history.enabled", false, - "Whether to log Hive query, query plan, runtime statistics etc."), - - HIVEQUERYSTRING("hive.query.string", "", - "Query being executed (might be multiple per a session)"), - - HIVEQUERYID("hive.query.id", "", - "ID for query being executed (might be multiple per a session)"), - - HIVEJOBNAMELENGTH("hive.jobname.length", 50, "max jobname length"), - - // hive jar - HIVEJAR("hive.jar.path", "", - "The location of hive_cli.jar that is used when submitting jobs in a separate jvm."), - HIVEAUXJARS("hive.aux.jars.path", "", - "The location of the plugin jars that contain implementations of user defined functions and serdes."), - - // reloadable jars - HIVERELOADABLEJARS("hive.reloadable.aux.jars.path", "", - "The locations of the plugin jars, which can be a comma-separated folders or jars. Jars can be renewed\n" - + "by executing reload command. And these jars can be " - + "used as the auxiliary classes like creating a UDF or SerDe."), - - // hive added files and jars - HIVEADDEDFILES("hive.added.files.path", "", "This an internal parameter."), - HIVEADDEDJARS("hive.added.jars.path", "", "This an internal parameter."), - HIVEADDEDARCHIVES("hive.added.archives.path", "", "This an internal parameter."), - - HIVE_CURRENT_DATABASE("hive.current.database", "", "Database name used by current session. Internal usage only.", true), - - // for hive script operator - HIVES_AUTO_PROGRESS_TIMEOUT("hive.auto.progress.timeout", "0s", - new TimeValidator(TimeUnit.SECONDS), - "How long to run autoprogressor for the script/UDTF operators.\n" + - "Set to 0 for forever."), - HIVESCRIPTAUTOPROGRESS("hive.script.auto.progress", false, - "Whether Hive Transform/Map/Reduce Clause should automatically send progress information to TaskTracker \n" + - "to avoid the task getting killed because of inactivity. Hive sends progress information when the script is \n" + - "outputting to stderr. This option removes the need of periodically producing stderr messages, \n" + - "but users should be cautious because this may prevent infinite loops in the scripts to be killed by TaskTracker."), - HIVESCRIPTIDENVVAR("hive.script.operator.id.env.var", "HIVE_SCRIPT_OPERATOR_ID", - "Name of the environment variable that holds the unique script operator ID in the user's \n" + - "transform function (the custom mapper/reducer that the user has specified in the query)"), - HIVESCRIPTTRUNCATEENV("hive.script.operator.truncate.env", false, - "Truncate each environment variable for external script in scripts operator to 20KB (to fit system limits)"), - HIVESCRIPT_ENV_BLACKLIST("hive.script.operator.env.blacklist", - "hive.txn.valid.txns,hive.script.operator.env.blacklist", - "Comma separated list of keys from the configuration file not to convert to environment " + - "variables when envoking the script operator"), - HIVE_STRICT_CHECKS_LARGE_QUERY("hive.strict.checks.large.query", false, - "Enabling strict large query checks disallows the following:\n" + - " Orderby without limit.\n" + - " No partition being picked up for a query against partitioned table.\n" + - "Note that these checks currently do not consider data size, only the query pattern."), - HIVE_STRICT_CHECKS_TYPE_SAFETY("hive.strict.checks.type.safety", true, - "Enabling strict type safety checks disallows the following:\n" + - " Comparing bigints and strings.\n" + - " Comparing bigints and doubles."), - HIVE_STRICT_CHECKS_CARTESIAN("hive.strict.checks.cartesian.product", true, - "Enabling strict Cartesian join checks disallows the following:\n" + - " Cartesian product (cross join)."), - HIVE_STRICT_CHECKS_BUCKETING("hive.strict.checks.bucketing", true, - "Enabling strict bucketing checks disallows the following:\n" + - " Load into bucketed tables."), - - @Deprecated - HIVEMAPREDMODE("hive.mapred.mode", null, - "Deprecated; use hive.strict.checks.* settings instead."), - HIVEALIAS("hive.alias", "", ""), - HIVEMAPSIDEAGGREGATE("hive.map.aggr", true, "Whether to use map-side aggregation in Hive Group By queries"), - HIVEGROUPBYSKEW("hive.groupby.skewindata", false, "Whether there is skew in data to optimize group by queries"), - HIVEJOINEMITINTERVAL("hive.join.emit.interval", 1000, - "How many rows in the right-most join operand Hive should buffer before emitting the join result."), - HIVEJOINCACHESIZE("hive.join.cache.size", 25000, - "How many rows in the joining tables (except the streaming table) should be cached in memory."), - HIVE_PUSH_RESIDUAL_INNER("hive.join.inner.residual", false, - "Whether to push non-equi filter predicates within inner joins. This can improve efficiency in " - + "the evaluation of certain joins, since we will not be emitting rows which are thrown away by " - + "a Filter operator straight away. However, currently vectorization does not support them, thus " - + "enabling it is only recommended when vectorization is disabled."), - - // CBO related - HIVE_CBO_ENABLED("hive.cbo.enable", true, "Flag to control enabling Cost Based Optimizations using Calcite framework."), - HIVE_CBO_CNF_NODES_LIMIT("hive.cbo.cnf.maxnodes", -1, "When converting to conjunctive normal form (CNF), fail if" + - "the expression exceeds this threshold; the threshold is expressed in terms of number of nodes (leaves and" + - "interior nodes). -1 to not set up a threshold."), - HIVE_CBO_RETPATH_HIVEOP("hive.cbo.returnpath.hiveop", false, "Flag to control calcite plan to hive operator conversion"), - HIVE_CBO_EXTENDED_COST_MODEL("hive.cbo.costmodel.extended", false, "Flag to control enabling the extended cost model based on" - + "CPU, IO and cardinality. Otherwise, the cost model is based on cardinality."), - HIVE_CBO_COST_MODEL_CPU("hive.cbo.costmodel.cpu", "0.000001", "Default cost of a comparison"), - HIVE_CBO_COST_MODEL_NET("hive.cbo.costmodel.network", "150.0", "Default cost of a transfering a byte over network;" - + " expressed as multiple of CPU cost"), - HIVE_CBO_COST_MODEL_LFS_WRITE("hive.cbo.costmodel.local.fs.write", "4.0", "Default cost of writing a byte to local FS;" - + " expressed as multiple of NETWORK cost"), - HIVE_CBO_COST_MODEL_LFS_READ("hive.cbo.costmodel.local.fs.read", "4.0", "Default cost of reading a byte from local FS;" - + " expressed as multiple of NETWORK cost"), - HIVE_CBO_COST_MODEL_HDFS_WRITE("hive.cbo.costmodel.hdfs.write", "10.0", "Default cost of writing a byte to HDFS;" - + " expressed as multiple of Local FS write cost"), - HIVE_CBO_COST_MODEL_HDFS_READ("hive.cbo.costmodel.hdfs.read", "1.5", "Default cost of reading a byte from HDFS;" - + " expressed as multiple of Local FS read cost"), - HIVE_CBO_SHOW_WARNINGS("hive.cbo.show.warnings", true, - "Toggle display of CBO warnings like missing column stats"), - AGGR_JOIN_TRANSPOSE("hive.transpose.aggr.join", false, "push aggregates through join"), - SEMIJOIN_CONVERSION("hive.optimize.semijoin.conversion", true, "convert group by followed by inner equi join into semijoin"), - HIVE_COLUMN_ALIGNMENT("hive.order.columnalignment", true, "Flag to control whether we want to try to align" + - "columns in operators such as Aggregate or Join so that we try to reduce the number of shuffling stages"), - - // materialized views - HIVE_MATERIALIZED_VIEW_ENABLE_AUTO_REWRITING("hive.materializedview.rewriting", false, - "Whether to try to rewrite queries using the materialized views enabled for rewriting"), - HIVE_MATERIALIZED_VIEW_FILE_FORMAT("hive.materializedview.fileformat", "ORC", - new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC"), - "Default file format for CREATE MATERIALIZED VIEW statement"), - HIVE_MATERIALIZED_VIEW_SERDE("hive.materializedview.serde", - "org.apache.hadoop.hive.ql.io.orc.OrcSerde", "Default SerDe used for materialized views"), - - // hive.mapjoin.bucket.cache.size has been replaced by hive.smbjoin.cache.row, - // need to remove by hive .13. Also, do not change default (see SMB operator) - HIVEMAPJOINBUCKETCACHESIZE("hive.mapjoin.bucket.cache.size", 100, ""), - - HIVEMAPJOINUSEOPTIMIZEDTABLE("hive.mapjoin.optimized.hashtable", true, - "Whether Hive should use memory-optimized hash table for MapJoin.\n" + - "Only works on Tez and Spark, because memory-optimized hashtable cannot be serialized."), - HIVEMAPJOINOPTIMIZEDTABLEPROBEPERCENT("hive.mapjoin.optimized.hashtable.probe.percent", - (float) 0.5, "Probing space percentage of the optimized hashtable"), - HIVEUSEHYBRIDGRACEHASHJOIN("hive.mapjoin.hybridgrace.hashtable", true, "Whether to use hybrid" + - "grace hash join as the join method for mapjoin. Tez only."), - HIVEHYBRIDGRACEHASHJOINMEMCHECKFREQ("hive.mapjoin.hybridgrace.memcheckfrequency", 1024, "For " + - "hybrid grace hash join, how often (how many rows apart) we check if memory is full. " + - "This number should be power of 2."), - HIVEHYBRIDGRACEHASHJOINMINWBSIZE("hive.mapjoin.hybridgrace.minwbsize", 524288, "For hybrid grace" + - "Hash join, the minimum write buffer size used by optimized hashtable. Default is 512 KB."), - HIVEHYBRIDGRACEHASHJOINMINNUMPARTITIONS("hive.mapjoin.hybridgrace.minnumpartitions", 16, "For" + - "Hybrid grace hash join, the minimum number of partitions to create."), - HIVEHASHTABLEWBSIZE("hive.mapjoin.optimized.hashtable.wbsize", 8 * 1024 * 1024, - "Optimized hashtable (see hive.mapjoin.optimized.hashtable) uses a chain of buffers to\n" + - "store data. This is one buffer size. HT may be slightly faster if this is larger, but for small\n" + - "joins unnecessary memory will be allocated and then trimmed."), - HIVEHYBRIDGRACEHASHJOINBLOOMFILTER("hive.mapjoin.hybridgrace.bloomfilter", true, "Whether to " + - "use BloomFilter in Hybrid grace hash join to minimize unnecessary spilling."), - - HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000, - "How many rows with the same key value should be cached in memory per smb joined table."), - HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000, - "Number of rows after which size of the grouping keys/aggregation classes is performed"), - HIVEMAPAGGRHASHMEMORY("hive.map.aggr.hash.percentmemory", (float) 0.5, - "Portion of total memory to be used by map-side group aggregation hash table"), - HIVEMAPJOINFOLLOWEDBYMAPAGGRHASHMEMORY("hive.mapjoin.followby.map.aggr.hash.percentmemory", (float) 0.3, - "Portion of total memory to be used by map-side group aggregation hash table, when this group by is followed by map join"), - HIVEMAPAGGRMEMORYTHRESHOLD("hive.map.aggr.hash.force.flush.memory.threshold", (float) 0.9, - "The max memory to be used by map-side group aggregation hash table.\n" + - "If the memory usage is higher than this number, force to flush data"), - HIVEMAPAGGRHASHMINREDUCTION("hive.map.aggr.hash.min.reduction", (float) 0.5, - "Hash aggregation will be turned off if the ratio between hash table size and input rows is bigger than this number. \n" + - "Set to 1 to make sure hash aggregation is never turned off."), - HIVEMULTIGROUPBYSINGLEREDUCER("hive.multigroupby.singlereducer", true, - "Whether to optimize multi group by query to generate single M/R job plan. If the multi group by query has \n" + - "common group by keys, it will be optimized to generate single M/R job."), - HIVE_MAP_GROUPBY_SORT("hive.map.groupby.sorted", true, - "If the bucketing/sorting properties of the table exactly match the grouping key, whether to perform \n" + - "the group by in the mapper by using BucketizedHiveInputFormat. The only downside to this\n" + - "is that it limits the number of mappers to the number of files."), - HIVE_GROUPBY_POSITION_ALIAS("hive.groupby.position.alias", false, - "Whether to enable using Column Position Alias in Group By"), - HIVE_ORDERBY_POSITION_ALIAS("hive.orderby.position.alias", true, - "Whether to enable using Column Position Alias in Order By"), - @Deprecated - HIVE_GROUPBY_ORDERBY_POSITION_ALIAS("hive.groupby.orderby.position.alias", false, - "Whether to enable using Column Position Alias in Group By or Order By (deprecated).\n" + - "Use " + HIVE_ORDERBY_POSITION_ALIAS.varname + " or " + HIVE_GROUPBY_POSITION_ALIAS.varname + " instead"), - HIVE_NEW_JOB_GROUPING_SET_CARDINALITY("hive.new.job.grouping.set.cardinality", 30, - "Whether a new map-reduce job should be launched for grouping sets/rollups/cubes.\n" + - "For a query like: select a, b, c, count(1) from T group by a, b, c with rollup;\n" + - "4 rows are created per row: (a, b, c), (a, b, null), (a, null, null), (null, null, null).\n" + - "This can lead to explosion across map-reduce boundary if the cardinality of T is very high,\n" + - "and map-side aggregation does not do a very good job. \n" + - "\n" + - "This parameter decides if Hive should add an additional map-reduce job. If the grouping set\n" + - "cardinality (4 in the example above), is more than this value, a new MR job is added under the\n" + - "assumption that the original group by will reduce the data size."), - HIVE_GROUPBY_LIMIT_EXTRASTEP("hive.groupby.limit.extrastep", true, "This parameter decides if Hive should \n" + - "create new MR job for sorting final output"), - - // Max file num and size used to do a single copy (after that, distcp is used) - HIVE_EXEC_COPYFILE_MAXNUMFILES("hive.exec.copyfile.maxnumfiles", 1L, - "Maximum number of files Hive uses to do sequential HDFS copies between directories." + - "Distributed copies (distcp) will be used instead for larger numbers of files so that copies can be done faster."), - HIVE_EXEC_COPYFILE_MAXSIZE("hive.exec.copyfile.maxsize", 32L * 1024 * 1024 /*32M*/, - "Maximum file size (in bytes) that Hive uses to do single HDFS copies between directories." + - "Distributed copies (distcp) will be used instead for bigger files so that copies can be done faster."), - - // for hive udtf operator - HIVEUDTFAUTOPROGRESS("hive.udtf.auto.progress", false, - "Whether Hive should automatically send progress information to TaskTracker \n" + - "when using UDTF's to prevent the task getting killed because of inactivity. Users should be cautious \n" + - "because this may prevent TaskTracker from killing tasks with infinite loops."), - - HIVEDEFAULTFILEFORMAT("hive.default.fileformat", "TextFile", new StringSet("TextFile", "SequenceFile", "RCfile", "ORC", "parquet"), - "Default file format for CREATE TABLE statement. Users can explicitly override it by CREATE TABLE ... STORED AS [FORMAT]"), - HIVEDEFAULTMANAGEDFILEFORMAT("hive.default.fileformat.managed", "none", - new StringSet("none", "TextFile", "SequenceFile", "RCfile", "ORC", "parquet"), - "Default file format for CREATE TABLE statement applied to managed tables only. External tables will be \n" + - "created with format specified by hive.default.fileformat. Leaving this null will result in using hive.default.fileformat \n" + - "for all tables."), - HIVEQUERYRESULTFILEFORMAT("hive.query.result.fileformat", "SequenceFile", new StringSet("TextFile", "SequenceFile", "RCfile", "Llap"), - "Default file format for storing result of the query."), - HIVECHECKFILEFORMAT("hive.fileformat.check", true, "Whether to check file format or not when loading data files"), - - // default serde for rcfile - HIVEDEFAULTRCFILESERDE("hive.default.rcfile.serde", - "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe", - "The default SerDe Hive will use for the RCFile format"), - - HIVEDEFAULTSERDE("hive.default.serde", - "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", - "The default SerDe Hive will use for storage formats that do not specify a SerDe."), - - SERDESUSINGMETASTOREFORSCHEMA("hive.serdes.using.metastore.for.schema", - "org.apache.hadoop.hive.ql.io.orc.OrcSerde," + - "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe," + - "org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe," + - "org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe," + - "org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe," + - "org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe," + - "org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe," + - "org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe", - "SerDes retrieving schema from metastore. This is an internal parameter."), - - HIVEHISTORYFILELOC("hive.querylog.location", - "${system:java.io.tmpdir}" + File.separator + "${system:user.name}", - "Location of Hive run time structured log file"), - - HIVE_LOG_INCREMENTAL_PLAN_PROGRESS("hive.querylog.enable.plan.progress", true, - "Whether to log the plan's progress every time a job's progress is checked.\n" + - "These logs are written to the location specified by hive.querylog.location"), - - HIVE_LOG_INCREMENTAL_PLAN_PROGRESS_INTERVAL("hive.querylog.plan.progress.interval", "60000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "The interval to wait between logging the plan's progress.\n" + - "If there is a whole number percentage change in the progress of the mappers or the reducers,\n" + - "the progress is logged regardless of this value.\n" + - "The actual interval will be the ceiling of (this value divided by the value of\n" + - "hive.exec.counters.pull.interval) multiplied by the value of hive.exec.counters.pull.interval\n" + - "I.e. if it is not divide evenly by the value of hive.exec.counters.pull.interval it will be\n" + - "logged less frequently than specified.\n" + - "This only has an effect if hive.querylog.enable.plan.progress is set to true."), - - HIVESCRIPTSERDE("hive.script.serde", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe", - "The default SerDe for transmitting input data to and reading output data from the user scripts. "), - HIVESCRIPTRECORDREADER("hive.script.recordreader", - "org.apache.hadoop.hive.ql.exec.TextRecordReader", - "The default record reader for reading data from the user scripts. "), - HIVESCRIPTRECORDWRITER("hive.script.recordwriter", - "org.apache.hadoop.hive.ql.exec.TextRecordWriter", - "The default record writer for writing data to the user scripts. "), - HIVESCRIPTESCAPE("hive.transform.escape.input", false, - "This adds an option to escape special chars (newlines, carriage returns and\n" + - "tabs) when they are passed to the user script. This is useful if the Hive tables\n" + - "can contain data that contains special characters."), - HIVEBINARYRECORDMAX("hive.binary.record.max.length", 1000, - "Read from a binary stream and treat each hive.binary.record.max.length bytes as a record. \n" + - "The last record before the end of stream can have less than hive.binary.record.max.length bytes"), - - HIVEHADOOPMAXMEM("hive.mapred.local.mem", 0, "mapper/reducer memory in local mode"), - - //small table file size - HIVESMALLTABLESFILESIZE("hive.mapjoin.smalltable.filesize", 25000000L, - "The threshold for the input file size of the small tables; if the file size is smaller \n" + - "than this threshold, it will try to convert the common join into map join"), - - - HIVE_SCHEMA_EVOLUTION("hive.exec.schema.evolution", true, - "Use schema evolution to convert self-describing file format's data to the schema desired by the reader."), - - HIVE_TRANSACTIONAL_TABLE_SCAN("hive.transactional.table.scan", false, - "internal usage only -- do transaction (ACID) table scan.", true), - - HIVE_TRANSACTIONAL_NUM_EVENTS_IN_MEMORY("hive.transactional.events.mem", 10000000, - "Vectorized ACID readers can often load all the delete events from all the delete deltas\n" - + "into memory to optimize for performance. To prevent out-of-memory errors, this is a rough heuristic\n" - + "that limits the total number of delete events that can be loaded into memory at once.\n" - + "Roughly it has been set to 10 million delete events per bucket (~160 MB).\n"), - - HIVESAMPLERANDOMNUM("hive.sample.seednumber", 0, - "A number used to percentage sampling. By changing this number, user will change the subsets of data sampled."), - - // test mode in hive mode - HIVETESTMODE("hive.test.mode", false, - "Whether Hive is running in test mode. If yes, it turns on sampling and prefixes the output tablename.", - false), - HIVETESTMODEPREFIX("hive.test.mode.prefix", "test_", - "In test mode, specfies prefixes for the output table", false), - HIVETESTMODESAMPLEFREQ("hive.test.mode.samplefreq", 32, - "In test mode, specfies sampling frequency for table, which is not bucketed,\n" + - "For example, the following query:\n" + - " INSERT OVERWRITE TABLE dest SELECT col1 from src\n" + - "would be converted to\n" + - " INSERT OVERWRITE TABLE test_dest\n" + - " SELECT col1 from src TABLESAMPLE (BUCKET 1 out of 32 on rand(1))", false), - HIVETESTMODENOSAMPLE("hive.test.mode.nosamplelist", "", - "In test mode, specifies comma separated table names which would not apply sampling", false), - HIVETESTMODEDUMMYSTATAGGR("hive.test.dummystats.aggregator", "", "internal variable for test", false), - HIVETESTMODEDUMMYSTATPUB("hive.test.dummystats.publisher", "", "internal variable for test", false), - HIVETESTCURRENTTIMESTAMP("hive.test.currenttimestamp", null, "current timestamp for test", false), - HIVETESTMODEROLLBACKTXN("hive.test.rollbacktxn", false, "For testing only. Will mark every ACID transaction aborted", false), - HIVETESTMODEFAILCOMPACTION("hive.test.fail.compaction", false, "For testing only. Will cause CompactorMR to fail.", false), - HIVETESTMODEFAILHEARTBEATER("hive.test.fail.heartbeater", false, "For testing only. Will cause Heartbeater to fail.", false), - - HIVEMERGEMAPFILES("hive.merge.mapfiles", true, - "Merge small files at the end of a map-only job"), - HIVEMERGEMAPREDFILES("hive.merge.mapredfiles", false, - "Merge small files at the end of a map-reduce job"), - HIVEMERGETEZFILES("hive.merge.tezfiles", false, "Merge small files at the end of a Tez DAG"), - HIVEMERGESPARKFILES("hive.merge.sparkfiles", false, "Merge small files at the end of a Spark DAG Transformation"), - HIVEMERGEMAPFILESSIZE("hive.merge.size.per.task", (long) (256 * 1000 * 1000), - "Size of merged files at the end of the job"), - HIVEMERGEMAPFILESAVGSIZE("hive.merge.smallfiles.avgsize", (long) (16 * 1000 * 1000), - "When the average output file size of a job is less than this number, Hive will start an additional \n" + - "map-reduce job to merge the output files into bigger files. This is only done for map-only jobs \n" + - "if hive.merge.mapfiles is true, and for map-reduce jobs if hive.merge.mapredfiles is true."), - HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true, ""), - HIVEMERGEORCFILESTRIPELEVEL("hive.merge.orcfile.stripe.level", true, - "When hive.merge.mapfiles, hive.merge.mapredfiles or hive.merge.tezfiles is enabled\n" + - "while writing a table with ORC file format, enabling this config will do stripe-level\n" + - "fast merge for small ORC files. Note that enabling this config will not honor the\n" + - "padding tolerance config (hive.exec.orc.block.padding.tolerance)."), - - HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true, - "If this is set the header for RCFiles will simply be RCF. If this is not\n" + - "set the header will be that borrowed from sequence files, e.g. SEQ- followed\n" + - "by the input and output RCFile formats."), - HIVEUSERCFILESYNCCACHE("hive.exec.rcfile.use.sync.cache", true, ""), - - HIVE_RCFILE_RECORD_INTERVAL("hive.io.rcfile.record.interval", Integer.MAX_VALUE, ""), - HIVE_RCFILE_COLUMN_NUMBER_CONF("hive.io.rcfile.column.number.conf", 0, ""), - HIVE_RCFILE_TOLERATE_CORRUPTIONS("hive.io.rcfile.tolerate.corruptions", false, ""), - HIVE_RCFILE_RECORD_BUFFER_SIZE("hive.io.rcfile.record.buffer.size", 4194304, ""), // 4M - - PARQUET_MEMORY_POOL_RATIO("parquet.memory.pool.ratio", 0.5f, - "Maximum fraction of heap that can be used by Parquet file writers in one task.\n" + - "It is for avoiding OutOfMemory error in tasks. Work with Parquet 1.6.0 and above.\n" + - "This config parameter is defined in Parquet, so that it does not start with 'hive.'."), - @Deprecated - HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION("hive.parquet.timestamp.skip.conversion", true, - "Current Hive implementation of parquet stores timestamps to UTC, this flag allows skipping of the conversion" + - "on reading parquet files from other tools"), - HIVE_PARQUET_INT96_DEFAULT_UTC_WRITE_ZONE("hive.parquet.mr.int96.enable.utc.write.zone", false, - "Enable this variable to use UTC as the default timezone for new Parquet tables."), - HIVE_INT_TIMESTAMP_CONVERSION_IN_SECONDS("hive.int.timestamp.conversion.in.seconds", false, - "Boolean/tinyint/smallint/int/bigint value is interpreted as milliseconds during the timestamp conversion.\n" + - "Set this flag to true to interpret the value as seconds to be consistent with float/double." ), - - HIVE_ORC_BASE_DELTA_RATIO("hive.exec.orc.base.delta.ratio", 8, "The ratio of base writer and\n" + - "delta writer in terms of STRIPE_SIZE and BUFFER_SIZE."), - HIVE_ORC_SPLIT_STRATEGY("hive.exec.orc.split.strategy", "HYBRID", new StringSet("HYBRID", "BI", "ETL"), - "This is not a user level config. BI strategy is used when the requirement is to spend less time in split generation" + - " as opposed to query execution (split generation does not read or cache file footers)." + - " ETL strategy is used when spending little more time in split generation is acceptable" + - " (split generation reads and caches file footers). HYBRID chooses between the above strategies" + - " based on heuristics."), - - HIVE_ORC_MS_FOOTER_CACHE_ENABLED("hive.orc.splits.ms.footer.cache.enabled", false, - "Whether to enable using file metadata cache in metastore for ORC file footers."), - HIVE_ORC_MS_FOOTER_CACHE_PPD("hive.orc.splits.ms.footer.cache.ppd.enabled", true, - "Whether to enable file footer cache PPD (hive.orc.splits.ms.footer.cache.enabled\n" + - "must also be set to true for this to work)."), - - HIVE_ORC_INCLUDE_FILE_FOOTER_IN_SPLITS("hive.orc.splits.include.file.footer", false, - "If turned on splits generated by orc will include metadata about the stripes in the file. This\n" + - "data is read remotely (from the client or HS2 machine) and sent to all the tasks."), - HIVE_ORC_SPLIT_DIRECTORY_BATCH_MS("hive.orc.splits.directory.batch.ms", 0, - "How long, in ms, to wait to batch input directories for processing during ORC split\n" + - "generation. 0 means process directories individually. This can increase the number of\n" + - "metastore calls if metastore metadata cache is used."), - HIVE_ORC_INCLUDE_FILE_ID_IN_SPLITS("hive.orc.splits.include.fileid", true, - "Include file ID in splits on file systems that support it."), - HIVE_ORC_ALLOW_SYNTHETIC_FILE_ID_IN_SPLITS("hive.orc.splits.allow.synthetic.fileid", true, - "Allow synthetic file ID in splits on file systems that don't have a native one."), - HIVE_ORC_CACHE_STRIPE_DETAILS_MEMORY_SIZE("hive.orc.cache.stripe.details.mem.size", "256Mb", - new SizeValidator(), "Maximum size of orc splits cached in the client."), - HIVE_ORC_COMPUTE_SPLITS_NUM_THREADS("hive.orc.compute.splits.num.threads", 10, - "How many threads orc should use to create splits in parallel."), - HIVE_ORC_CACHE_USE_SOFT_REFERENCES("hive.orc.cache.use.soft.references", false, - "By default, the cache that ORC input format uses to store orc file footer use hard\n" + - "references for the cached object. Setting this to true can help avoid out of memory\n" + - "issues under memory pressure (in some cases) at the cost of slight unpredictability in\n" + - "overall query performance."), - - HIVE_LAZYSIMPLE_EXTENDED_BOOLEAN_LITERAL("hive.lazysimple.extended_boolean_literal", false, - "LazySimpleSerde uses this property to determine if it treats 'T', 't', 'F', 'f',\n" + - "'1', and '0' as extened, legal boolean literal, in addition to 'TRUE' and 'FALSE'.\n" + - "The default is false, which means only 'TRUE' and 'FALSE' are treated as legal\n" + - "boolean literal."), - - HIVESKEWJOIN("hive.optimize.skewjoin", false, - "Whether to enable skew join optimization. \n" + - "The algorithm is as follows: At runtime, detect the keys with a large skew. Instead of\n" + - "processing those keys, store them temporarily in an HDFS directory. In a follow-up map-reduce\n" + - "job, process those skewed keys. The same key need not be skewed for all the tables, and so,\n" + - "the follow-up map-reduce job (for the skewed keys) would be much faster, since it would be a\n" + - "map-join."), - HIVEDYNAMICPARTITIONHASHJOIN("hive.optimize.dynamic.partition.hashjoin", false, - "Whether to enable dynamically partitioned hash join optimization. \n" + - "This setting is also dependent on enabling hive.auto.convert.join"), - HIVECONVERTJOIN("hive.auto.convert.join", true, - "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size"), - HIVECONVERTJOINNOCONDITIONALTASK("hive.auto.convert.join.noconditionaltask", true, - "Whether Hive enables the optimization about converting common join into mapjoin based on the input file size. \n" + - "If this parameter is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than the\n" + - "specified size, the join is directly converted to a mapjoin (there is no conditional task)."), - - HIVECONVERTJOINNOCONDITIONALTASKTHRESHOLD("hive.auto.convert.join.noconditionaltask.size", - 10000000L, - "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" + - "However, if it is on, and the sum of size for n-1 of the tables/partitions for a n-way join is smaller than this size, \n" + - "the join is directly converted to a mapjoin(there is no conditional task). The default is 10MB"), - HIVECONVERTJOINUSENONSTAGED("hive.auto.convert.join.use.nonstaged", false, - "For conditional joins, if input stream from a small alias can be directly applied to join operator without \n" + - "filtering or projection, the alias need not to be pre-staged in distributed cache via mapred local task.\n" + - "Currently, this is not working with vectorization or tez execution engine."), - HIVESKEWJOINKEY("hive.skewjoin.key", 100000, - "Determine if we get a skew key in join. If we see more than the specified number of rows with the same key in join operator,\n" + - "we think the key as a skew join key. "), - HIVESKEWJOINMAPJOINNUMMAPTASK("hive.skewjoin.mapjoin.map.tasks", 10000, - "Determine the number of map task used in the follow up map join job for a skew join.\n" + - "It should be used together with hive.skewjoin.mapjoin.min.split to perform a fine grained control."), - HIVESKEWJOINMAPJOINMINSPLIT("hive.skewjoin.mapjoin.min.split", 33554432L, - "Determine the number of map task at most used in the follow up map join job for a skew join by specifying \n" + - "the minimum split size. It should be used together with hive.skewjoin.mapjoin.map.tasks to perform a fine grained control."), - - HIVESENDHEARTBEAT("hive.heartbeat.interval", 1000, - "Send a heartbeat after this interval - used by mapjoin and filter operators"), - HIVELIMITMAXROWSIZE("hive.limit.row.max.size", 100000L, - "When trying a smaller subset of data for simple LIMIT, how much size we need to guarantee each row to have at least."), - HIVELIMITOPTLIMITFILE("hive.limit.optimize.limit.file", 10, - "When trying a smaller subset of data for simple LIMIT, maximum number of files we can sample."), - HIVELIMITOPTENABLE("hive.limit.optimize.enable", false, - "Whether to enable to optimization to trying a smaller subset of data for simple LIMIT first."), - HIVELIMITOPTMAXFETCH("hive.limit.optimize.fetch.max", 50000, - "Maximum number of rows allowed for a smaller subset of data for simple LIMIT, if it is a fetch query. \n" + - "Insert queries are not restricted by this limit."), - HIVELIMITPUSHDOWNMEMORYUSAGE("hive.limit.pushdown.memory.usage", 0.1f, new RatioValidator(), - "The fraction of available memory to be used for buffering rows in Reducesink operator for limit pushdown optimization."), - - @Deprecated - HIVELIMITTABLESCANPARTITION("hive.limit.query.max.table.partition", -1, - "This controls how many partitions can be scanned for each partitioned table.\n" + - "The default value \"-1\" means no limit. (DEPRECATED: Please use " + ConfVars.METASTORE_LIMIT_PARTITION_REQUEST + " in the metastore instead.)"), - - HIVECONVERTJOINMAXENTRIESHASHTABLE("hive.auto.convert.join.hashtable.max.entries", 40000000L, - "If hive.auto.convert.join.noconditionaltask is off, this parameter does not take affect. \n" + - "However, if it is on, and the predicated number of entries in hashtable for a given join \n" + - "input is larger than this number, the join will not be converted to a mapjoin. \n" + - "The value \"-1\" means no limit."), - HIVEHASHTABLEKEYCOUNTADJUSTMENT("hive.hashtable.key.count.adjustment", 1.0f, - "Adjustment to mapjoin hashtable size derived from table and column statistics; the estimate" + - " of the number of keys is divided by this value. If the value is 0, statistics are not used" + - "and hive.hashtable.initialCapacity is used instead."), - HIVEHASHTABLETHRESHOLD("hive.hashtable.initialCapacity", 100000, "Initial capacity of " + - "mapjoin hashtable if statistics are absent, or if hive.hashtable.key.count.adjustment is set to 0"), - HIVEHASHTABLELOADFACTOR("hive.hashtable.loadfactor", (float) 0.75, ""), - HIVEHASHTABLEFOLLOWBYGBYMAXMEMORYUSAGE("hive.mapjoin.followby.gby.localtask.max.memory.usage", (float) 0.55, - "This number means how much memory the local task can take to hold the key/value into an in-memory hash table \n" + - "when this map join is followed by a group by. If the local task's memory usage is more than this number, \n" + - "the local task will abort by itself. It means the data of the small table is too large to be held in memory."), - HIVEHASHTABLEMAXMEMORYUSAGE("hive.mapjoin.localtask.max.memory.usage", (float) 0.90, - "This number means how much memory the local task can take to hold the key/value into an in-memory hash table. \n" + - "If the local task's memory usage is more than this number, the local task will abort by itself. \n" + - "It means the data of the small table is too large to be held in memory."), - HIVEHASHTABLESCALE("hive.mapjoin.check.memory.rows", (long)100000, - "The number means after how many rows processed it needs to check the memory usage"), - - HIVEDEBUGLOCALTASK("hive.debug.localtask",false, ""), - - HIVEINPUTFORMAT("hive.input.format", "org.apache.hadoop.hive.ql.io.CombineHiveInputFormat", - "The default input format. Set this to HiveInputFormat if you encounter problems with CombineHiveInputFormat."), - HIVETEZINPUTFORMAT("hive.tez.input.format", "org.apache.hadoop.hive.ql.io.HiveInputFormat", - "The default input format for tez. Tez groups splits in the AM."), - - HIVETEZCONTAINERSIZE("hive.tez.container.size", -1, - "By default Tez will spawn containers of the size of a mapper. This can be used to overwrite."), - HIVETEZCPUVCORES("hive.tez.cpu.vcores", -1, - "By default Tez will ask for however many cpus map-reduce is configured to use per container.\n" + - "This can be used to overwrite."), - HIVETEZJAVAOPTS("hive.tez.java.opts", null, - "By default Tez will use the Java options from map tasks. This can be used to overwrite."), - HIVETEZLOGLEVEL("hive.tez.log.level", "INFO", - "The log level to use for tasks executing as part of the DAG.\n" + - "Used only if hive.tez.java.opts is used to configure Java options."), - HIVETEZHS2USERACCESS("hive.tez.hs2.user.access", true, - "Whether to grant access to the hs2/hive user for queries"), - HIVEQUERYNAME ("hive.query.name", null, - "This named is used by Tez to set the dag name. This name in turn will appear on \n" + - "the Tez UI representing the work that was done."), - - HIVEOPTIMIZEBUCKETINGSORTING("hive.optimize.bucketingsorting", true, - "Don't create a reducer for enforcing \n" + - "bucketing/sorting for queries of the form: \n" + - "insert overwrite table T2 select * from T1;\n" + - "where T1 and T2 are bucketed/sorted by the same keys into the same number of buckets."), - HIVEPARTITIONER("hive.mapred.partitioner", "org.apache.hadoop.hive.ql.io.DefaultHivePartitioner", ""), - HIVEENFORCESORTMERGEBUCKETMAPJOIN("hive.enforce.sortmergebucketmapjoin", false, - "If the user asked for sort-merge bucketed map-side join, and it cannot be performed, should the query fail or not ?"), - HIVEENFORCEBUCKETMAPJOIN("hive.enforce.bucketmapjoin", false, - "If the user asked for bucketed map-side join, and it cannot be performed, \n" + - "should the query fail or not ? For example, if the buckets in the tables being joined are\n" + - "not a multiple of each other, bucketed map-side join cannot be performed, and the\n" + - "query will fail if hive.enforce.bucketmapjoin is set to true."), - - HIVE_AUTO_SORTMERGE_JOIN("hive.auto.convert.sortmerge.join", false, - "Will the join be automatically converted to a sort-merge join, if the joined tables pass the criteria for sort-merge join."), - HIVE_AUTO_SORTMERGE_JOIN_REDUCE("hive.auto.convert.sortmerge.join.reduce.side", true, - "Whether hive.auto.convert.sortmerge.join (if enabled) should be applied to reduce side."), - HIVE_AUTO_SORTMERGE_JOIN_BIGTABLE_SELECTOR( - "hive.auto.convert.sortmerge.join.bigtable.selection.policy", - "org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ", - "The policy to choose the big table for automatic conversion to sort-merge join. \n" + - "By default, the table with the largest partitions is assigned the big table. All policies are:\n" + - ". based on position of the table - the leftmost table is selected\n" + - "org.apache.hadoop.hive.ql.optimizer.LeftmostBigTableSMJ.\n" + - ". based on total size (all the partitions selected in the query) of the table \n" + - "org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ.\n" + - ". based on average size (all the partitions selected in the query) of the table \n" + - "org.apache.hadoop.hive.ql.optimizer.AvgPartitionSizeBasedBigTableSelectorForAutoSMJ.\n" + - "New policies can be added in future."), - HIVE_AUTO_SORTMERGE_JOIN_TOMAPJOIN( - "hive.auto.convert.sortmerge.join.to.mapjoin", false, - "If hive.auto.convert.sortmerge.join is set to true, and a join was converted to a sort-merge join, \n" + - "this parameter decides whether each table should be tried as a big table, and effectively a map-join should be\n" + - "tried. That would create a conditional task with n+1 children for a n-way join (1 child for each table as the\n" + - "big table), and the backup task will be the sort-merge join. In some cases, a map-join would be faster than a\n" + - "sort-merge join, if there is no advantage of having the output bucketed and sorted. For example, if a very big sorted\n" + - "and bucketed table with few files (say 10 files) are being joined with a very small sorter and bucketed table\n" + - "with few files (10 files), the sort-merge join will only use 10 mappers, and a simple map-only join might be faster\n" + - "if the complete small table can fit in memory, and a map-join can be performed."), - - HIVESCRIPTOPERATORTRUST("hive.exec.script.trust", false, ""), - HIVEROWOFFSET("hive.exec.rowoffset", false, - "Whether to provide the row offset virtual column"), - - // Optimizer - HIVEOPTINDEXFILTER("hive.optimize.index.filter", false, - "Whether to enable automatic use of indexes"), - HIVEINDEXAUTOUPDATE("hive.optimize.index.autoupdate", false, - "Whether to update stale indexes automatically"), - HIVEOPTPPD("hive.optimize.ppd", true, - "Whether to enable predicate pushdown"), - HIVEOPTPPD_WINDOWING("hive.optimize.ppd.windowing", true, - "Whether to enable predicate pushdown through windowing"), - HIVEPPDRECOGNIZETRANSITIVITY("hive.ppd.recognizetransivity", true, - "Whether to transitively replicate predicate filters over equijoin conditions."), - HIVEPPDREMOVEDUPLICATEFILTERS("hive.ppd.remove.duplicatefilters", true, - "During query optimization, filters may be pushed down in the operator tree. \n" + - "If this config is true only pushed down filters remain in the operator tree, \n" + - "and the original filter is removed. If this config is false, the original filter \n" + - "is also left in the operator tree at the original place."), - HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true, - "Whether to transform OR clauses in Filter operators into IN clauses"), - HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 31, - "Minimum number of OR clauses needed to transform into IN clauses"), - HIVECOUNTDISTINCTOPTIMIZER("hive.optimize.countdistinct", true, - "Whether to transform count distinct into two stages"), - HIVEPARTITIONCOLUMNSEPARATOR("hive.optimize.partition.columns.separate", true, - "Extract partition columns from IN clauses"), - // Constant propagation optimizer - HIVEOPTCONSTANTPROPAGATION("hive.optimize.constant.propagation", true, "Whether to enable constant propagation optimizer"), - HIVEIDENTITYPROJECTREMOVER("hive.optimize.remove.identity.project", true, "Removes identity project from operator tree"), - HIVEMETADATAONLYQUERIES("hive.optimize.metadataonly", false, - "Whether to eliminate scans of the tables from which no columns are selected. Note\n" + - "that, when selecting from empty tables with data files, this can produce incorrect\n" + - "results, so it's disabled by default. It works correctly for normal tables."), - HIVENULLSCANOPTIMIZE("hive.optimize.null.scan", true, "Dont scan relations which are guaranteed to not generate any rows"), - HIVEOPTPPD_STORAGE("hive.optimize.ppd.storage", true, - "Whether to push predicates down to storage handlers"), - HIVEOPTGROUPBY("hive.optimize.groupby", true, - "Whether to enable the bucketed group by from bucketed partitions/tables."), - HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false, - "Whether to try bucket mapjoin"), - HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false, - "Whether to try sorted bucket merge map join"), - HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true, - "Remove extra map-reduce jobs if the data is already clustered by the same key which needs to be used again. \n" + - "This should always be set to true. Since it is a new feature, it has been made configurable."), - HIVEOPTREDUCEDEDUPLICATIONMINREDUCER("hive.optimize.reducededuplication.min.reducer", 4, - "Reduce deduplication merges two RSs by moving key/parts/reducer-num of the child RS to parent RS. \n" + - "That means if reducer-num of the child RS is fixed (order by or forced bucketing) and small, it can make very slow, single MR.\n" + - "The optimization will be automatically disabled if number of reducers would be less than specified value."), - - HIVEOPTSORTDYNAMICPARTITION("hive.optimize.sort.dynamic.partition", false, - "When enabled dynamic partitioning column will be globally sorted.\n" + - "This way we can keep only one record writer open for each partition value\n" + - "in the reducer thereby reducing the memory pressure on reducers."), - - HIVESAMPLINGFORORDERBY("hive.optimize.sampling.orderby", false, "Uses sampling on order-by clause for parallel execution."), - HIVESAMPLINGNUMBERFORORDERBY("hive.optimize.sampling.orderby.number", 1000, "Total number of samples to be obtained."), - HIVESAMPLINGPERCENTFORORDERBY("hive.optimize.sampling.orderby.percent", 0.1f, new RatioValidator(), - "Probability with which a row will be chosen."), - HIVEOPTIMIZEDISTINCTREWRITE("hive.optimize.distinct.rewrite", true, "When applicable this " - + "optimization rewrites distinct aggregates from a single stage to multi-stage " - + "aggregation. This may not be optimal in all cases. Ideally, whether to trigger it or " - + "not should be cost based decision. Until Hive formalizes cost model for this, this is config driven."), - // whether to optimize union followed by select followed by filesink - // It creates sub-directories in the final output, so should not be turned on in systems - // where MAPREDUCE-1501 is not present - HIVE_OPTIMIZE_UNION_REMOVE("hive.optimize.union.remove", false, - "Whether to remove the union and push the operators between union and the filesink above union. \n" + - "This avoids an extra scan of the output by union. This is independently useful for union\n" + - "queries, and specially useful when hive.optimize.skewjoin.compiletime is set to true, since an\n" + - "extra union is inserted.\n" + - "\n" + - "The merge is triggered if either of hive.merge.mapfiles or hive.merge.mapredfiles is set to true.\n" + - "If the user has set hive.merge.mapfiles to true and hive.merge.mapredfiles to false, the idea was the\n" + - "number of reducers are few, so the number of files anyway are small. However, with this optimization,\n" + - "we are increasing the number of files possibly by a big margin. So, we merge aggressively."), - HIVEOPTCORRELATION("hive.optimize.correlation", false, "exploit intra-query correlations."), - - HIVE_OPTIMIZE_LIMIT_TRANSPOSE("hive.optimize.limittranspose", false, - "Whether to push a limit through left/right outer join or union. If the value is true and the size of the outer\n" + - "input is reduced enough (as specified in hive.optimize.limittranspose.reduction), the limit is pushed\n" + - "to the outer input or union; to remain semantically correct, the limit is kept on top of the join or the union too."), - HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_PERCENTAGE("hive.optimize.limittranspose.reductionpercentage", 1.0f, - "When hive.optimize.limittranspose is true, this variable specifies the minimal reduction of the\n" + - "size of the outer input of the join or input of the union that we should get in order to apply the rule."), - HIVE_OPTIMIZE_LIMIT_TRANSPOSE_REDUCTION_TUPLES("hive.optimize.limittranspose.reductiontuples", (long) 0, - "When hive.optimize.limittranspose is true, this variable specifies the minimal reduction in the\n" + - "number of tuples of the outer input of the join or the input of the union that you should get in order to apply the rule."), - - HIVE_OPTIMIZE_REDUCE_WITH_STATS("hive.optimize.filter.stats.reduction", false, "Whether to simplify comparison\n" + - "expressions in filter operators using column stats"), - - HIVE_OPTIMIZE_SKEWJOIN_COMPILETIME("hive.optimize.skewjoin.compiletime", false, - "Whether to create a separate plan for skewed keys for the tables in the join.\n" + - "This is based on the skewed keys stored in the metadata. At compile time, the plan is broken\n" + - "into different joins: one for the skewed keys, and the other for the remaining keys. And then,\n" + - "a union is performed for the 2 joins generated above. So unless the same skewed key is present\n" + - "in both the joined tables, the join for the skewed key will be performed as a map-side join.\n" + - "\n" + - "The main difference between this parameter and hive.optimize.skewjoin is that this parameter\n" + - "uses the skew information stored in the metastore to optimize the plan at compile time itself.\n" + - "If there is no skew information in the metadata, this parameter will not have any affect.\n" + - "Both hive.optimize.skewjoin.compiletime and hive.optimize.skewjoin should be set to true.\n" + - "Ideally, hive.optimize.skewjoin should be renamed as hive.optimize.skewjoin.runtime, but not doing\n" + - "so for backward compatibility.\n" + - "\n" + - "If the skew information is correctly stored in the metadata, hive.optimize.skewjoin.compiletime\n" + - "would change the query plan to take care of it, and hive.optimize.skewjoin will be a no-op."), - - HIVE_SHARED_WORK_OPTIMIZATION("hive.optimize.shared.work", true, - "Whether to enable shared work optimizer. The optimizer finds scan operator over the same table\n" + - "and follow-up operators in the query plan and merges them if they meet some preconditions."), - - // CTE - HIVE_CTE_MATERIALIZE_THRESHOLD("hive.optimize.cte.materialize.threshold", -1, - "If the number of references to a CTE clause exceeds this threshold, Hive will materialize it\n" + - "before executing the main query block. -1 will disable this feature."), - - // Indexes - HIVEOPTINDEXFILTER_COMPACT_MINSIZE("hive.optimize.index.filter.compact.minsize", (long) 5 * 1024 * 1024 * 1024, - "Minimum size (in bytes) of the inputs on which a compact index is automatically used."), // 5G - HIVEOPTINDEXFILTER_COMPACT_MAXSIZE("hive.optimize.index.filter.compact.maxsize", (long) -1, - "Maximum size (in bytes) of the inputs on which a compact index is automatically used. A negative number is equivalent to infinity."), // infinity - HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES("hive.index.compact.query.max.entries", (long) 10000000, - "The maximum number of index entries to read during a query that uses the compact index. Negative value is equivalent to infinity."), // 10M - HIVE_INDEX_COMPACT_QUERY_MAX_SIZE("hive.index.compact.query.max.size", (long) 10 * 1024 * 1024 * 1024, - "The maximum number of bytes that a query using the compact index can read. Negative value is equivalent to infinity."), // 10G - HIVE_INDEX_COMPACT_BINARY_SEARCH("hive.index.compact.binary.search", true, - "Whether or not to use a binary search to find the entries in an index table that match the filter, where possible"), - - // Statistics - HIVESTATSAUTOGATHER("hive.stats.autogather", true, - "A flag to gather statistics (only basic) automatically during the INSERT OVERWRITE command."), - HIVESTATSCOLAUTOGATHER("hive.stats.column.autogather", false, - "A flag to gather column statistics automatically."), - HIVESTATSDBCLASS("hive.stats.dbclass", "fs", new PatternSet("custom", "fs"), - "The storage that stores temporary Hive statistics. In filesystem based statistics collection ('fs'), \n" + - "each task writes statistics it has collected in a file on the filesystem, which will be aggregated \n" + - "after the job has finished. Supported values are fs (filesystem) and custom as defined in StatsSetupConst.java."), // StatsSetupConst.StatDB - HIVE_STATS_DEFAULT_PUBLISHER("hive.stats.default.publisher", "", - "The Java class (implementing the StatsPublisher interface) that is used by default if hive.stats.dbclass is custom type."), - HIVE_STATS_DEFAULT_AGGREGATOR("hive.stats.default.aggregator", "", - "The Java class (implementing the StatsAggregator interface) that is used by default if hive.stats.dbclass is custom type."), - HIVE_STATS_ATOMIC("hive.stats.atomic", false, - "whether to update metastore stats only if all stats are available"), - CLIENT_STATS_COUNTERS("hive.client.stats.counters", "", - "Subset of counters that should be of interest for hive.client.stats.publishers (when one wants to limit their publishing). \n" + - "Non-display names should be used"), - //Subset of counters that should be of interest for hive.client.stats.publishers (when one wants to limit their publishing). Non-display names should be used". - HIVE_STATS_RELIABLE("hive.stats.reliable", false, - "Whether queries will fail because stats cannot be collected completely accurately. \n" + - "If this is set to true, reading/writing from/into a partition may fail because the stats\n" + - "could not be computed accurately."), - HIVE_STATS_COLLECT_PART_LEVEL_STATS("hive.analyze.stmt.collect.partlevel.stats", true, - "analyze table T compute statistics for columns. Queries like these should compute partition" - + "level stats for partitioned table even when no part spec is specified."), - HIVE_STATS_GATHER_NUM_THREADS("hive.stats.gather.num.threads", 10, - "Number of threads used by partialscan/noscan analyze command for partitioned tables.\n" + - "This is applicable only for file formats that implement StatsProvidingRecordReader (like ORC)."), - // Collect table access keys information for operators that can benefit from bucketing - HIVE_STATS_COLLECT_TABLEKEYS("hive.stats.collect.tablekeys", false, - "Whether join and group by keys on tables are derived and maintained in the QueryPlan.\n" + - "This is useful to identify how tables are accessed and to determine if they should be bucketed."), - // Collect column access information - HIVE_STATS_COLLECT_SCANCOLS("hive.stats.collect.scancols", false, - "Whether column accesses are tracked in the QueryPlan.\n" + - "This is useful to identify how tables are accessed and to determine if there are wasted columns that can be trimmed."), - // standard error allowed for ndv estimates. A lower value indicates higher accuracy and a - // higher compute cost. - HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0, - "Standard error expressed in percentage. Provides a tradeoff between accuracy and compute cost. \n" + - "A lower value for error indicates higher accuracy and a higher compute cost."), - HIVE_METASTORE_STATS_NDV_TUNER("hive.metastore.stats.ndv.tuner", (float)0.0, - "Provides a tunable parameter between the lower bound and the higher bound of ndv for aggregate ndv across all the partitions. \n" + - "The lower bound is equal to the maximum of ndv of all the partitions. The higher bound is equal to the sum of ndv of all the partitions.\n" + - "Its value should be between 0.0 (i.e., choose lower bound) and 1.0 (i.e., choose higher bound)"), - HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION("hive.metastore.stats.ndv.densityfunction", false, - "Whether to use density function to estimate the NDV for the whole table based on the NDV of partitions"), - HIVE_STATS_KEY_PREFIX("hive.stats.key.prefix", "", "", true), // internal usage only - // if length of variable length data type cannot be determined this length will be used. - HIVE_STATS_MAX_VARIABLE_LENGTH("hive.stats.max.variable.length", 100, - "To estimate the size of data flowing through operators in Hive/Tez(for reducer estimation etc.),\n" + - "average row size is multiplied with the total number of rows coming out of each operator.\n" + - "Average row size is computed from average column size of all columns in the row. In the absence\n" + - "of column statistics, for variable length columns (like string, bytes etc.), this value will be\n" + - "used. For fixed length columns their corresponding Java equivalent sizes are used\n" + - "(float - 4 bytes, double - 8 bytes etc.)."), - // if number of elements in list cannot be determined, this value will be used - HIVE_STATS_LIST_NUM_ENTRIES("hive.stats.list.num.entries", 10, - "To estimate the size of data flowing through operators in Hive/Tez(for reducer estimation etc.),\n" + - "average row size is multiplied with the total number of rows coming out of each operator.\n" + - "Average row size is computed from average column size of all columns in the row. In the absence\n" + - "of column statistics and for variable length complex columns like list, the average number of\n" + - "entries/values can be specified using this config."), - // if number of elements in map cannot be determined, this value will be used - HIVE_STATS_MAP_NUM_ENTRIES("hive.stats.map.num.entries", 10, - "To estimate the size of data flowing through operators in Hive/Tez(for reducer estimation etc.),\n" + - "average row size is multiplied with the total number of rows coming out of each operator.\n" + - "Average row size is computed from average column size of all columns in the row. In the absence\n" + - "of column statistics and for variable length complex columns like map, the average number of\n" + - "entries/values can be specified using this config."), - // statistics annotation fetches stats for each partition, which can be expensive. turning - // this off will result in basic sizes being fetched from namenode instead - HIVE_STATS_FETCH_PARTITION_STATS("hive.stats.fetch.partition.stats", true, - "Annotation of operator tree with statistics information requires partition level basic\n" + - "statistics like number of rows, data size and file size. Partition statistics are fetched from\n" + - "metastore. Fetching partition statistics for each needed partition can be expensive when the\n" + - "number of partitions is high. This flag can be used to disable fetching of partition statistics\n" + - "from metastore. When this flag is disabled, Hive will make calls to filesystem to get file sizes\n" + - "and will estimate the number of rows from row schema."), - // statistics annotation fetches column statistics for all required columns which can - // be very expensive sometimes - HIVE_STATS_FETCH_COLUMN_STATS("hive.stats.fetch.column.stats", false, - "Annotation of operator tree with statistics information requires column statistics.\n" + - "Column statistics are fetched from metastore. Fetching column statistics for each needed column\n" + - "can be expensive when the number of columns is high. This flag can be used to disable fetching\n" + - "of column statistics from metastore."), - // in the absence of column statistics, the estimated number of rows/data size that will - // be emitted from join operator will depend on this factor - HIVE_STATS_JOIN_FACTOR("hive.stats.join.factor", (float) 1.1, - "Hive/Tez optimizer estimates the data size flowing through each of the operators. JOIN operator\n" + - "uses column statistics to estimate the number of rows flowing out of it and hence the data size.\n" + - "In the absence of column statistics, this factor determines the amount of rows that flows out\n" + - "of JOIN operator."), - HIVE_STATS_CORRELATED_MULTI_KEY_JOINS("hive.stats.correlated.multi.key.joins", false, - "When estimating output rows for a join involving multiple columns, the default behavior assumes" + - "the columns are independent. Setting this flag to true will cause the estimator to assume" + - "the columns are correlated."), - // in the absence of uncompressed/raw data size, total file size will be used for statistics - // annotation. But the file may be compressed, encoded and serialized which may be lesser in size - // than the actual uncompressed/raw data size. This factor will be multiplied to file size to estimate - // the raw data size. - HIVE_STATS_DESERIALIZATION_FACTOR("hive.stats.deserialization.factor", (float) 1.0, - "Hive/Tez optimizer estimates the data size flowing through each of the operators. In the absence\n" + - "of basic statistics like number of rows and data size, file size is used to estimate the number\n" + - "of rows and data size. Since files in tables/partitions are serialized (and optionally\n" + - "compressed) the estimates of number of rows and data size cannot be reliably determined.\n" + - "This factor is multiplied with the file size to account for serialization and compression."), - HIVE_STATS_IN_CLAUSE_FACTOR("hive.stats.filter.in.factor", (float) 1.0, - "Currently column distribution is assumed to be uniform. This can lead to overestimation/underestimation\n" + - "in the number of rows filtered by a certain operator, which in turn might lead to overprovision or\n" + - "underprovision of resources. This factor is applied to the cardinality estimation of IN clauses in\n" + - "filter operators."), - - // Concurrency - HIVE_SUPPORT_CONCURRENCY("hive.support.concurrency", false, - "Whether Hive supports concurrency control or not. \n" + - "A ZooKeeper instance must be up and running when using zookeeper Hive lock manager "), - HIVE_LOCK_MANAGER("hive.lock.manager", "org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager", ""), - HIVE_LOCK_NUMRETRIES("hive.lock.numretries", 100, - "The number of times you want to try to get all the locks"), - HIVE_UNLOCK_NUMRETRIES("hive.unlock.numretries", 10, - "The number of times you want to retry to do one unlock"), - HIVE_LOCK_SLEEP_BETWEEN_RETRIES("hive.lock.sleep.between.retries", "60s", - new TimeValidator(TimeUnit.SECONDS, 0L, false, Long.MAX_VALUE, false), - "The maximum sleep time between various retries"), - HIVE_LOCK_MAPRED_ONLY("hive.lock.mapred.only.operation", false, - "This param is to control whether or not only do lock on queries\n" + - "that need to execute at least one mapred job."), - HIVE_LOCK_QUERY_STRING_MAX_LENGTH("hive.lock.query.string.max.length", 1000000, - "The maximum length of the query string to store in the lock.\n" + - "The default value is 1000000, since the data limit of a znode is 1MB"), - - // Zookeeper related configs - HIVE_ZOOKEEPER_QUORUM("hive.zookeeper.quorum", "", - "List of ZooKeeper servers to talk to. This is needed for: \n" + - "1. Read/write locks - when hive.lock.manager is set to \n" + - "org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager, \n" + - "2. When HiveServer2 supports service discovery via Zookeeper.\n" + - "3. For delegation token storage if zookeeper store is used, if\n" + - "hive.cluster.delegation.token.store.zookeeper.connectString is not set\n" + - "4. LLAP daemon registry service"), - - HIVE_ZOOKEEPER_CLIENT_PORT("hive.zookeeper.client.port", "2181", - "The port of ZooKeeper servers to talk to.\n" + - "If the list of Zookeeper servers specified in hive.zookeeper.quorum\n" + - "does not contain port numbers, this value is used."), - HIVE_ZOOKEEPER_SESSION_TIMEOUT("hive.zookeeper.session.timeout", "1200000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "ZooKeeper client's session timeout (in milliseconds). The client is disconnected, and as a result, all locks released, \n" + - "if a heartbeat is not sent in the timeout."), - HIVE_ZOOKEEPER_NAMESPACE("hive.zookeeper.namespace", "hive_zookeeper_namespace", - "The parent node under which all ZooKeeper nodes are created."), - HIVE_ZOOKEEPER_CLEAN_EXTRA_NODES("hive.zookeeper.clean.extra.nodes", false, - "Clean extra nodes at the end of the session."), - HIVE_ZOOKEEPER_CONNECTION_MAX_RETRIES("hive.zookeeper.connection.max.retries", 3, - "Max number of times to retry when connecting to the ZooKeeper server."), - HIVE_ZOOKEEPER_CONNECTION_BASESLEEPTIME("hive.zookeeper.connection.basesleeptime", "1000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Initial amount of time (in milliseconds) to wait between retries\n" + - "when connecting to the ZooKeeper server when using ExponentialBackoffRetry policy."), - - // Transactions - HIVE_TXN_MANAGER("hive.txn.manager", - "org.apache.hadoop.hive.ql.lockmgr.DummyTxnManager", - "Set to org.apache.hadoop.hive.ql.lockmgr.DbTxnManager as part of turning on Hive\n" + - "transactions, which also requires appropriate settings for hive.compactor.initiator.on,\n" + - "hive.compactor.worker.threads, hive.support.concurrency (true),\n" + - "and hive.exec.dynamic.partition.mode (nonstrict).\n" + - "The default DummyTxnManager replicates pre-Hive-0.13 behavior and provides\n" + - "no transactions."), - HIVE_TXN_STRICT_LOCKING_MODE("hive.txn.strict.locking.mode", true, "In strict mode non-ACID\n" + - "resources use standard R/W lock semantics, e.g. INSERT will acquire exclusive lock.\n" + - "In nonstrict mode, for non-ACID resources, INSERT will only acquire shared lock, which\n" + - "allows two concurrent writes to the same partition but still lets lock manager prevent\n" + - "DROP TABLE etc. when the table is being written to"), - HIVE_TXN_TIMEOUT("hive.txn.timeout", "300s", new TimeValidator(TimeUnit.SECONDS), - "time after which transactions are declared aborted if the client has not sent a heartbeat."), - HIVE_TXN_HEARTBEAT_THREADPOOL_SIZE("hive.txn.heartbeat.threadpool.size", 5, "The number of " + - "threads to use for heartbeating. For Hive CLI, 1 is enough. For HiveServer2, we need a few"), - TXN_MGR_DUMP_LOCK_STATE_ON_ACQUIRE_TIMEOUT("hive.txn.manager.dump.lock.state.on.acquire.timeout", false, - "Set this to true so that when attempt to acquire a lock on resource times out, the current state" + - " of the lock manager is dumped to log file. This is for debugging. See also " + - "hive.lock.numretries and hive.lock.sleep.between.retries."), - - HIVE_TXN_OPERATIONAL_PROPERTIES("hive.txn.operational.properties", 0, - "Sets the operational properties that control the appropriate behavior for various\n" - + "versions of the Hive ACID subsystem. Setting it to zero will turn on the legacy mode\n" - + "for ACID, while setting it to one will enable a split-update feature found in the newer\n" - + "version of Hive ACID subsystem. Mostly it is intended to be used as an internal property\n" - + "for future versions of ACID. (See HIVE-14035 for details.)"), - - HIVE_MAX_OPEN_TXNS("hive.max.open.txns", 100000, "Maximum number of open transactions. If \n" + - "current open transactions reach this limit, future open transaction requests will be \n" + - "rejected, until this number goes below the limit."), - HIVE_COUNT_OPEN_TXNS_INTERVAL("hive.count.open.txns.interval", "1s", - new TimeValidator(TimeUnit.SECONDS), "Time in seconds between checks to count open transactions."), - - HIVE_TXN_MAX_OPEN_BATCH("hive.txn.max.open.batch", 1000, - "Maximum number of transactions that can be fetched in one call to open_txns().\n" + - "This controls how many transactions streaming agents such as Flume or Storm open\n" + - "simultaneously. The streaming agent then writes that number of entries into a single\n" + - "file (per Flume agent or Storm bolt). Thus increasing this value decreases the number\n" + - "of delta files created by streaming agents. But it also increases the number of open\n" + - "transactions that Hive has to track at any given time, which may negatively affect\n" + - "read performance."), - - HIVE_TXN_RETRYABLE_SQLEX_REGEX("hive.txn.retryable.sqlex.regex", "", "Comma separated list\n" + - "of regular expression patterns for SQL state, error code, and error message of\n" + - "retryable SQLExceptions, that's suitable for the metastore DB.\n" + - "For example: Can't serialize.*,40001$,^Deadlock,.*ORA-08176.*\n" + - "The string that the regex will be matched against is of the following form, where ex is a SQLException:\n" + - "ex.getMessage() + \" (SQLState=\" + ex.getSQLState() + \", ErrorCode=\" + ex.getErrorCode() + \")\""), - - HIVE_COMPACTOR_INITIATOR_ON("hive.compactor.initiator.on", false, - "Whether to run the initiator and cleaner threads on this metastore instance or not.\n" + - "Set this to true on one instance of the Thrift metastore service as part of turning\n" + - "on Hive transactions. For a complete list of parameters required for turning on\n" + - "transactions, see hive.txn.manager."), - - HIVE_COMPACTOR_WORKER_THREADS("hive.compactor.worker.threads", 0, - "How many compactor worker threads to run on this metastore instance. Set this to a\n" + - "positive number on one or more instances of the Thrift metastore service as part of\n" + - "turning on Hive transactions. For a complete list of parameters required for turning\n" + - "on transactions, see hive.txn.manager.\n" + - "Worker threads spawn MapReduce jobs to do compactions. They do not do the compactions\n" + - "themselves. Increasing the number of worker threads will decrease the time it takes\n" + - "tables or partitions to be compacted once they are determined to need compaction.\n" + - "It will also increase the background load on the Hadoop cluster as more MapReduce jobs\n" + - "will be running in the background."), - - HIVE_COMPACTOR_WORKER_TIMEOUT("hive.compactor.worker.timeout", "86400s", - new TimeValidator(TimeUnit.SECONDS), - "Time in seconds after which a compaction job will be declared failed and the\n" + - "compaction re-queued."), - - HIVE_COMPACTOR_CHECK_INTERVAL("hive.compactor.check.interval", "300s", - new TimeValidator(TimeUnit.SECONDS), - "Time in seconds between checks to see if any tables or partitions need to be\n" + - "compacted. This should be kept high because each check for compaction requires\n" + - "many calls against the NameNode.\n" + - "Decreasing this value will reduce the time it takes for compaction to be started\n" + - "for a table or partition that requires compaction. However, checking if compaction\n" + - "is needed requires several calls to the NameNode for each table or partition that\n" + - "has had a transaction done on it since the last major compaction. So decreasing this\n" + - "value will increase the load on the NameNode."), - - HIVE_COMPACTOR_DELTA_NUM_THRESHOLD("hive.compactor.delta.num.threshold", 10, - "Number of delta directories in a table or partition that will trigger a minor\n" + - "compaction."), - - HIVE_COMPACTOR_DELTA_PCT_THRESHOLD("hive.compactor.delta.pct.threshold", 0.1f, - "Percentage (fractional) size of the delta files relative to the base that will trigger\n" + - "a major compaction. (1.0 = 100%, so the default 0.1 = 10%.)"), - COMPACTOR_MAX_NUM_DELTA("hive.compactor.max.num.delta", 500, "Maximum number of delta files that " + - "the compactor will attempt to handle in a single job."), - - HIVE_COMPACTOR_ABORTEDTXN_THRESHOLD("hive.compactor.abortedtxn.threshold", 1000, - "Number of aborted transactions involving a given table or partition that will trigger\n" + - "a major compaction."), - - COMPACTOR_INITIATOR_FAILED_THRESHOLD("hive.compactor.initiator.failed.compacts.threshold", 2, - new RangeValidator(1, 20), "Number of consecutive compaction failures (per table/partition) " + - "after which automatic compactions will not be scheduled any more. Note that this must be less " + - "than hive.compactor.history.retention.failed."), - - HIVE_COMPACTOR_CLEANER_RUN_INTERVAL("hive.compactor.cleaner.run.interval", "5000ms", - new TimeValidator(TimeUnit.MILLISECONDS), "Time between runs of the cleaner thread"), - COMPACTOR_JOB_QUEUE("hive.compactor.job.queue", "", "Used to specify name of Hadoop queue to which\n" + - "Compaction jobs will be submitted. Set to empty string to let Hadoop choose the queue."), - - COMPACTOR_HISTORY_RETENTION_SUCCEEDED("hive.compactor.history.retention.succeeded", 3, - new RangeValidator(0, 100), "Determines how many successful compaction records will be " + - "retained in compaction history for a given table/partition."), - - COMPACTOR_HISTORY_RETENTION_FAILED("hive.compactor.history.retention.failed", 3, - new RangeValidator(0, 100), "Determines how many failed compaction records will be " + - "retained in compaction history for a given table/partition."), - - COMPACTOR_HISTORY_RETENTION_ATTEMPTED("hive.compactor.history.retention.attempted", 2, - new RangeValidator(0, 100), "Determines how many attempted compaction records will be " + - "retained in compaction history for a given table/partition."), - - COMPACTOR_HISTORY_REAPER_INTERVAL("hive.compactor.history.reaper.interval", "2m", - new TimeValidator(TimeUnit.MILLISECONDS), "Determines how often compaction history reaper runs"), - - HIVE_TIMEDOUT_TXN_REAPER_START("hive.timedout.txn.reaper.start", "100s", - new TimeValidator(TimeUnit.MILLISECONDS), "Time delay of 1st reaper run after metastore start"), - HIVE_TIMEDOUT_TXN_REAPER_INTERVAL("hive.timedout.txn.reaper.interval", "180s", - new TimeValidator(TimeUnit.MILLISECONDS), "Time interval describing how often the reaper runs"), - WRITE_SET_REAPER_INTERVAL("hive.writeset.reaper.interval", "60s", - new TimeValidator(TimeUnit.MILLISECONDS), "Frequency of WriteSet reaper runs"), - - MERGE_CARDINALITY_VIOLATION_CHECK("hive.merge.cardinality.check", true, - "Set to true to ensure that each SQL Merge statement ensures that for each row in the target\n" + - "table there is at most 1 matching row in the source table per SQL Specification."), - - // For Druid storage handler - HIVE_DRUID_INDEXING_GRANULARITY("hive.druid.indexer.segments.granularity", "DAY", - new PatternSet("YEAR", "MONTH", "WEEK", "DAY", "HOUR", "MINUTE", "SECOND"), - "Granularity for the segments created by the Druid storage handler" - ), - HIVE_DRUID_MAX_PARTITION_SIZE("hive.druid.indexer.partition.size.max", 5000000, - "Maximum number of records per segment partition" - ), - HIVE_DRUID_MAX_ROW_IN_MEMORY("hive.druid.indexer.memory.rownum.max", 75000, - "Maximum number of records in memory while storing data in Druid" - ), - HIVE_DRUID_BROKER_DEFAULT_ADDRESS("hive.druid.broker.address.default", "localhost:8082", - "Address of the Druid broker. If we are querying Druid from Hive, this address needs to be\n" - + - "declared" - ), - HIVE_DRUID_COORDINATOR_DEFAULT_ADDRESS("hive.druid.coordinator.address.default", "localhost:8081", - "Address of the Druid coordinator. It is used to check the load status of newly created segments" - ), - HIVE_DRUID_SELECT_DISTRIBUTE("hive.druid.select.distribute", true, - "If it is set to true, we distribute the execution of Druid Select queries. Concretely, we retrieve\n" + - "the result for Select queries directly from the Druid nodes containing the segments data.\n" + - "In particular, first we contact the Druid broker node to obtain the nodes containing the segments\n" + - "for the given query, and then we contact those nodes to retrieve the results for the query.\n" + - "If it is set to false, we do not execute the Select queries in a distributed fashion. Instead, results\n" + - "for those queries are returned by the Druid broker node."), - HIVE_DRUID_SELECT_THRESHOLD("hive.druid.select.threshold", 10000, - "Takes only effect when hive.druid.select.distribute is set to false. \n" + - "When we can split a Select query, this is the maximum number of rows that we try to retrieve\n" + - "per query. In order to do that, we obtain the estimated size for the complete result. If the\n" + - "number of records of the query results is larger than this threshold, we split the query in\n" + - "total number of rows/threshold parts across the time dimension. Note that we assume the\n" + - "records to be split uniformly across the time dimension."), - HIVE_DRUID_NUM_HTTP_CONNECTION("hive.druid.http.numConnection", 20, "Number of connections used by\n" + - "the HTTP client."), - HIVE_DRUID_HTTP_READ_TIMEOUT("hive.druid.http.read.timeout", "PT1M", "Read timeout period for the HTTP\n" + - "client in ISO8601 format (for example P2W, P3M, PT1H30M, PT0.750S), default is period of 1 minute."), - HIVE_DRUID_SLEEP_TIME("hive.druid.sleep.time", "PT10S", - "Sleep time between retries in ISO8601 format (for example P2W, P3M, PT1H30M, PT0.750S), default is period of 10 seconds." - ), - HIVE_DRUID_BASE_PERSIST_DIRECTORY("hive.druid.basePersistDirectory", "", - "Local temporary directory used to persist intermediate indexing state, will default to JVM system property java.io.tmpdir." - ), - DRUID_SEGMENT_DIRECTORY("hive.druid.storage.storageDirectory", "/druid/segments" - , "druid deep storage location."), - DRUID_METADATA_BASE("hive.druid.metadata.base", "druid", "Default prefix for metadata tables"), - DRUID_METADATA_DB_TYPE("hive.druid.metadata.db.type", "mysql", - new PatternSet("mysql", "postgresql"), "Type of the metadata database." - ), - DRUID_METADATA_DB_USERNAME("hive.druid.metadata.username", "", - "Username to connect to Type of the metadata DB." - ), - DRUID_METADATA_DB_PASSWORD("hive.druid.metadata.password", "", - "Password to connect to Type of the metadata DB." - ), - DRUID_METADATA_DB_URI("hive.druid.metadata.uri", "", - "URI to connect to the database (for example jdbc:mysql://hostname:port/DBName)." - ), - DRUID_WORKING_DIR("hive.druid.working.directory", "/tmp/workingDirectory", - "Default hdfs working directory used to store some intermediate metadata" - ), - HIVE_DRUID_MAX_TRIES("hive.druid.maxTries", 5, "Maximum number of retries before giving up"), - HIVE_DRUID_PASSIVE_WAIT_TIME("hive.druid.passiveWaitTimeMs", 30000, - "Wait time in ms default to 30 seconds." - ), - HIVE_DRUID_BITMAP_FACTORY_TYPE("hive.druid.bitmap.type", "roaring", new PatternSet("roaring", "concise"), "Coding algorithm use to encode the bitmaps"), - // For HBase storage handler - HIVE_HBASE_WAL_ENABLED("hive.hbase.wal.enabled", true, - "Whether writes to HBase should be forced to the write-ahead log. \n" + - "Disabling this improves HBase write performance at the risk of lost writes in case of a crash."), - HIVE_HBASE_GENERATE_HFILES("hive.hbase.generatehfiles", false, - "True when HBaseStorageHandler should generate hfiles instead of operate against the online table."), - HIVE_HBASE_SNAPSHOT_NAME("hive.hbase.snapshot.name", null, "The HBase table snapshot name to use."), - HIVE_HBASE_SNAPSHOT_RESTORE_DIR("hive.hbase.snapshot.restoredir", "/tmp", "The directory in which to " + - "restore the HBase table snapshot."), - - // For har files - HIVEARCHIVEENABLED("hive.archive.enabled", false, "Whether archiving operations are permitted"), - - HIVEOPTGBYUSINGINDEX("hive.optimize.index.groupby", false, - "Whether to enable optimization of group-by queries using Aggregate indexes."), - - HIVEFETCHTASKCONVERSION("hive.fetch.task.conversion", "more", new StringSet("none", "minimal", "more"), - "Some select queries can be converted to single FETCH task minimizing latency.\n" + - "Currently the query should be single sourced not having any subquery and should not have\n" + - "any aggregations or distincts (which incurs RS), lateral views and joins.\n" + - "0. none : disable hive.fetch.task.conversion\n" + - "1. minimal : SELECT STAR, FILTER on partition columns, LIMIT only\n" + - "2. more : SELECT, FILTER, LIMIT only (support TABLESAMPLE and virtual columns)" - ), - HIVEFETCHTASKCONVERSIONTHRESHOLD("hive.fetch.task.conversion.threshold", 1073741824L, - "Input threshold for applying hive.fetch.task.conversion. If target table is native, input length\n" + - "is calculated by summation of file lengths. If it's not native, storage handler for the table\n" + - "can optionally implement org.apache.hadoop.hive.ql.metadata.InputEstimator interface."), - - HIVEFETCHTASKAGGR("hive.fetch.task.aggr", false, - "Aggregation queries with no group-by clause (for example, select count(*) from src) execute\n" + - "final aggregations in single reduce task. If this is set true, Hive delegates final aggregation\n" + - "stage to fetch task, possibly decreasing the query time."), - - HIVEOPTIMIZEMETADATAQUERIES("hive.compute.query.using.stats", true, - "When set to true Hive will answer a few queries like count(1) purely using stats\n" + - "stored in metastore. For basic stats collection turn on the config hive.stats.autogather to true.\n" + - "For more advanced stats collection need to run analyze table queries."), - - // Serde for FetchTask - HIVEFETCHOUTPUTSERDE("hive.fetch.output.serde", "org.apache.hadoop.hive.serde2.DelimitedJSONSerDe", - "The SerDe used by FetchTask to serialize the fetch output."), - - HIVEEXPREVALUATIONCACHE("hive.cache.expr.evaluation", true, - "If true, the evaluation result of a deterministic expression referenced twice or more\n" + - "will be cached.\n" + - "For example, in a filter condition like '.. where key + 10 = 100 or key + 10 = 0'\n" + - "the expression 'key + 10' will be evaluated/cached once and reused for the following\n" + - "expression ('key + 10 = 0'). Currently, this is applied only to expressions in select\n" + - "or filter operators."), - - // Hive Variables - HIVEVARIABLESUBSTITUTE("hive.variable.substitute", true, - "This enables substitution using syntax like ${var} ${system:var} and ${env:var}."), - HIVEVARIABLESUBSTITUTEDEPTH("hive.variable.substitute.depth", 40, - "The maximum replacements the substitution engine will do."), - - HIVECONFVALIDATION("hive.conf.validation", true, - "Enables type checking for registered Hive configurations"), - - SEMANTIC_ANALYZER_HOOK("hive.semantic.analyzer.hook", "", ""), - HIVE_TEST_AUTHORIZATION_SQLSTD_HS2_MODE( - "hive.test.authz.sstd.hs2.mode", false, "test hs2 mode from .q tests", true), - HIVE_AUTHORIZATION_ENABLED("hive.security.authorization.enabled", false, - "enable or disable the Hive client authorization"), - HIVE_AUTHORIZATION_MANAGER("hive.security.authorization.manager", - "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory", - "The Hive client authorization manager class name. The user defined authorization class should implement \n" + - "interface org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider."), - HIVE_AUTHENTICATOR_MANAGER("hive.security.authenticator.manager", - "org.apache.hadoop.hive.ql.security.HadoopDefaultAuthenticator", - "hive client authenticator manager class name. The user defined authenticator should implement \n" + - "interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider."), - HIVE_METASTORE_AUTHORIZATION_MANAGER("hive.security.metastore.authorization.manager", - "org.apache.hadoop.hive.ql.security.authorization.DefaultHiveMetastoreAuthorizationProvider", - "Names of authorization manager classes (comma separated) to be used in the metastore\n" + - "for authorization. The user defined authorization class should implement interface\n" + - "org.apache.hadoop.hive.ql.security.authorization.HiveMetastoreAuthorizationProvider.\n" + - "All authorization manager classes have to successfully authorize the metastore API\n" + - "call for the command execution to be allowed."), - HIVE_METASTORE_AUTHORIZATION_AUTH_READS("hive.security.metastore.authorization.auth.reads", true, - "If this is true, metastore authorizer authorizes read actions on database, table"), - HIVE_METASTORE_AUTHENTICATOR_MANAGER("hive.security.metastore.authenticator.manager", - "org.apache.hadoop.hive.ql.security.HadoopDefaultMetastoreAuthenticator", - "authenticator manager class name to be used in the metastore for authentication. \n" + - "The user defined authenticator should implement interface org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider."), - HIVE_AUTHORIZATION_TABLE_USER_GRANTS("hive.security.authorization.createtable.user.grants", "", - "the privileges automatically granted to some users whenever a table gets created.\n" + - "An example like \"userX,userY:select;userZ:create\" will grant select privilege to userX and userY,\n" + - "and grant create privilege to userZ whenever a new table created."), - HIVE_AUTHORIZATION_TABLE_GROUP_GRANTS("hive.security.authorization.createtable.group.grants", - "", - "the privileges automatically granted to some groups whenever a table gets created.\n" + - "An example like \"groupX,groupY:select;groupZ:create\" will grant select privilege to groupX and groupY,\n" + - "and grant create privilege to groupZ whenever a new table created."), - HIVE_AUTHORIZATION_TABLE_ROLE_GRANTS("hive.security.authorization.createtable.role.grants", "", - "the privileges automatically granted to some roles whenever a table gets created.\n" + - "An example like \"roleX,roleY:select;roleZ:create\" will grant select privilege to roleX and roleY,\n" + - "and grant create privilege to roleZ whenever a new table created."), - HIVE_AUTHORIZATION_TABLE_OWNER_GRANTS("hive.security.authorization.createtable.owner.grants", - "", - "The privileges automatically granted to the owner whenever a table gets created.\n" + - "An example like \"select,drop\" will grant select and drop privilege to the owner\n" + - "of the table. Note that the default gives the creator of a table no access to the\n" + - "table (but see HIVE-8067)."), - HIVE_AUTHORIZATION_TASK_FACTORY("hive.security.authorization.task.factory", - "org.apache.hadoop.hive.ql.parse.authorization.HiveAuthorizationTaskFactoryImpl", - "Authorization DDL task factory implementation"), - - // if this is not set default value is set during config initialization - // Default value can't be set in this constructor as it would refer names in other ConfVars - // whose constructor would not have been called - HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST( - "hive.security.authorization.sqlstd.confwhitelist", "", - "List of comma separated Java regexes. Configurations parameters that match these\n" + - "regexes can be modified by user when SQL standard authorization is enabled.\n" + - "To get the default value, use the 'set ' command.\n" + - "Note that the hive.conf.restricted.list checks are still enforced after the white list\n" + - "check"), - - HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST_APPEND( - "hive.security.authorization.sqlstd.confwhitelist.append", "", - "List of comma separated Java regexes, to be appended to list set in\n" + - "hive.security.authorization.sqlstd.confwhitelist. Using this list instead\n" + - "of updating the original list means that you can append to the defaults\n" + - "set by SQL standard authorization instead of replacing it entirely."), - - HIVE_CLI_PRINT_HEADER("hive.cli.print.header", false, "Whether to print the names of the columns in query output."), - - HIVE_CLI_TEZ_SESSION_ASYNC("hive.cli.tez.session.async", true, "Whether to start Tez\n" + - "session in background when running CLI with Tez, allowing CLI to be available earlier."), - - HIVE_ERROR_ON_EMPTY_PARTITION("hive.error.on.empty.partition", false, - "Whether to throw an exception if dynamic partition insert generates empty results."), - - HIVE_INDEX_COMPACT_FILE("hive.index.compact.file", "", "internal variable"), - HIVE_INDEX_BLOCKFILTER_FILE("hive.index.blockfilter.file", "", "internal variable"), - HIVE_INDEX_IGNORE_HDFS_LOC("hive.index.compact.file.ignore.hdfs", false, - "When true the HDFS location stored in the index file will be ignored at runtime.\n" + - "If the data got moved or the name of the cluster got changed, the index data should still be usable."), - - HIVE_EXIM_URI_SCHEME_WL("hive.exim.uri.scheme.whitelist", "hdfs,pfile,file,s3,s3a", - "A comma separated list of acceptable URI schemes for import and export."), - // temporary variable for testing. This is added just to turn off this feature in case of a bug in - // deployment. It has not been documented in hive-default.xml intentionally, this should be removed - // once the feature is stable - HIVE_EXIM_RESTRICT_IMPORTS_INTO_REPLICATED_TABLES("hive.exim.strict.repl.tables",true, - "Parameter that determines if 'regular' (non-replication) export dumps can be\n" + - "imported on to tables that are the target of replication. If this parameter is\n" + - "set, regular imports will check if the destination table(if it exists) has a " + - "'repl.last.id' set on it. If so, it will fail."), - HIVE_REPL_TASK_FACTORY("hive.repl.task.factory", - "org.apache.hive.hcatalog.api.repl.exim.EximReplicationTaskFactory", - "Parameter that can be used to override which ReplicationTaskFactory will be\n" + - "used to instantiate ReplicationTask events. Override for third party repl plugins"), - HIVE_MAPPER_CANNOT_SPAN_MULTIPLE_PARTITIONS("hive.mapper.cannot.span.multiple.partitions", false, ""), - HIVE_REWORK_MAPREDWORK("hive.rework.mapredwork", false, - "should rework the mapred work or not.\n" + - "This is first introduced by SymlinkTextInputFormat to replace symlink files with real paths at compile time."), - HIVE_CONCATENATE_CHECK_INDEX ("hive.exec.concatenate.check.index", true, - "If this is set to true, Hive will throw error when doing\n" + - "'alter table tbl_name [partSpec] concatenate' on a table/partition\n" + - "that has indexes on it. The reason the user want to set this to true\n" + - "is because it can help user to avoid handling all index drop, recreation,\n" + - "rebuild work. This is very helpful for tables with thousands of partitions."), - HIVE_IO_EXCEPTION_HANDLERS("hive.io.exception.handlers", "", - "A list of io exception handler class names. This is used\n" + - "to construct a list exception handlers to handle exceptions thrown\n" + - "by record readers"), - - // logging configuration - HIVE_LOG4J_FILE("hive.log4j.file", "", - "Hive log4j configuration file.\n" + - "If the property is not set, then logging will be initialized using hive-log4j2.properties found on the classpath.\n" + - "If the property is set, the value must be a valid URI (java.net.URI, e.g. \"file:///tmp/my-logging.xml\"), \n" + - "which you can then extract a URL from and pass to PropertyConfigurator.configure(URL)."), - HIVE_EXEC_LOG4J_FILE("hive.exec.log4j.file", "", - "Hive log4j configuration file for execution mode(sub command).\n" + - "If the property is not set, then logging will be initialized using hive-exec-log4j2.properties found on the classpath.\n" + - "If the property is set, the value must be a valid URI (java.net.URI, e.g. \"file:///tmp/my-logging.xml\"), \n" + - "which you can then extract a URL from and pass to PropertyConfigurator.configure(URL)."), - HIVE_ASYNC_LOG_ENABLED("hive.async.log.enabled", true, - "Whether to enable Log4j2's asynchronous logging. Asynchronous logging can give\n" + - " significant performance improvement as logging will be handled in separate thread\n" + - " that uses LMAX disruptor queue for buffering log messages.\n" + - " Refer https://logging.apache.org/log4j/2.x/manual/async.html for benefits and\n" + - " drawbacks."), - - HIVE_LOG_EXPLAIN_OUTPUT("hive.log.explain.output", false, - "Whether to log explain output for every query.\n" + - "When enabled, will log EXPLAIN EXTENDED output for the query at INFO log4j log level."), - HIVE_EXPLAIN_USER("hive.explain.user", true, - "Whether to show explain result at user level.\n" + - "When enabled, will log EXPLAIN output for the query at user level. Tez only."), - HIVE_SPARK_EXPLAIN_USER("hive.spark.explain.user", false, - "Whether to show explain result at user level.\n" + - "When enabled, will log EXPLAIN output for the query at user level. Spark only."), - - // prefix used to auto generated column aliases (this should be started with '_') - HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL("hive.autogen.columnalias.prefix.label", "_c", - "String used as a prefix when auto generating column alias.\n" + - "By default the prefix label will be appended with a column position number to form the column alias. \n" + - "Auto generation would happen if an aggregate function is used in a select clause without an explicit alias."), - HIVE_AUTOGEN_COLUMNALIAS_PREFIX_INCLUDEFUNCNAME( - "hive.autogen.columnalias.prefix.includefuncname", false, - "Whether to include function name in the column alias auto generated by Hive."), - HIVE_METRICS_CLASS("hive.service.metrics.class", - "org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics", - new StringSet( - "org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics", - "org.apache.hadoop.hive.common.metrics.LegacyMetrics"), - "Hive metrics subsystem implementation class."), - HIVE_CODAHALE_METRICS_REPORTER_CLASSES("hive.service.metrics.codahale.reporter.classes", - "org.apache.hadoop.hive.common.metrics.metrics2.JsonFileMetricsReporter, " + - "org.apache.hadoop.hive.common.metrics.metrics2.JmxMetricsReporter", - "Comma separated list of reporter implementation classes for metric class " - + "org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics. Overrides " - + "HIVE_METRICS_REPORTER conf if present"), - @Deprecated - HIVE_METRICS_REPORTER("hive.service.metrics.reporter", "", - "Reporter implementations for metric class " - + "org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics;" + - "Deprecated, use HIVE_CODAHALE_METRICS_REPORTER_CLASSES instead. This configuraiton will be" - + " overridden by HIVE_CODAHALE_METRICS_REPORTER_CLASSES if present. " + - "Comma separated list of JMX, CONSOLE, JSON_FILE, HADOOP2"), - HIVE_METRICS_JSON_FILE_LOCATION("hive.service.metrics.file.location", "/tmp/report.json", - "For metric class org.apache.hadoop.hive.common.metrics.metrics2.CodahaleMetrics JSON_FILE reporter, the location of local JSON metrics file. " + - "This file will get overwritten at every interval."), - HIVE_METRICS_JSON_FILE_INTERVAL("hive.service.metrics.file.frequency", "5000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "For metric class org.apache.hadoop.hive.common.metrics.metrics2.JsonFileMetricsReporter, " + - "the frequency of updating JSON metrics file."), - HIVE_METRICS_HADOOP2_INTERVAL("hive.service.metrics.hadoop2.frequency", "30s", - new TimeValidator(TimeUnit.SECONDS), - "For metric class org.apache.hadoop.hive.common.metrics.metrics2.Metrics2Reporter, " + - "the frequency of updating the HADOOP2 metrics system."), - HIVE_METRICS_HADOOP2_COMPONENT_NAME("hive.service.metrics.hadoop2.component", - "hive", - "Component name to provide to Hadoop2 Metrics system. Ideally 'hivemetastore' for the MetaStore " + - " and and 'hiveserver2' for HiveServer2." - ), - HIVE_PERF_LOGGER("hive.exec.perf.logger", "org.apache.hadoop.hive.ql.log.PerfLogger", - "The class responsible for logging client side performance metrics. \n" + - "Must be a subclass of org.apache.hadoop.hive.ql.log.PerfLogger"), - HIVE_START_CLEANUP_SCRATCHDIR("hive.start.cleanup.scratchdir", false, - "To cleanup the Hive scratchdir when starting the Hive Server"), - HIVE_SCRATCH_DIR_LOCK("hive.scratchdir.lock", false, - "To hold a lock file in scratchdir to prevent to be removed by cleardanglingscratchdir"), - HIVE_INSERT_INTO_MULTILEVEL_DIRS("hive.insert.into.multilevel.dirs", false, - "Where to insert into multilevel directories like\n" + - "\"insert directory '/HIVEFT25686/chinna/' from table\""), - HIVE_INSERT_INTO_EXTERNAL_TABLES("hive.insert.into.external.tables", true, - "whether insert into external tables is allowed"), - HIVE_TEMPORARY_TABLE_STORAGE( - "hive.exec.temporary.table.storage", "default", new StringSet("memory", - "ssd", "default"), "Define the storage policy for temporary tables." + - "Choices between memory, ssd and default"), - HIVE_QUERY_LIFETIME_HOOKS("hive.query.lifetime.hooks", "", - "A comma separated list of hooks which implement QueryLifeTimeHook. These will be triggered" + - " before/after query compilation and before/after query execution, in the order specified." + - "Implementations of QueryLifeTimeHookWithParseHooks can also be specified in this list. If they are" + - "specified then they will be invoked in the same places as QueryLifeTimeHooks and will be invoked during pre " + - "and post query parsing"), - HIVE_DRIVER_RUN_HOOKS("hive.exec.driver.run.hooks", "", - "A comma separated list of hooks which implement HiveDriverRunHook. Will be run at the beginning " + - "and end of Driver.run, these will be run in the order specified."), - HIVE_DDL_OUTPUT_FORMAT("hive.ddl.output.format", null, - "The data format to use for DDL output. One of \"text\" (for human\n" + - "readable text) or \"json\" (for a json object)."), - HIVE_ENTITY_SEPARATOR("hive.entity.separator", "@", - "Separator used to construct names of tables and partitions. For example, dbname@tablename@partitionname"), - HIVE_CAPTURE_TRANSFORM_ENTITY("hive.entity.capture.transform", false, - "Compiler to capture transform URI referred in the query"), - HIVE_DISPLAY_PARTITION_COLUMNS_SEPARATELY("hive.display.partition.cols.separately", true, - "In older Hive version (0.10 and earlier) no distinction was made between\n" + - "partition columns or non-partition columns while displaying columns in describe\n" + - "table. From 0.12 onwards, they are displayed separately. This flag will let you\n" + - "get old behavior, if desired. See, test-case in patch for HIVE-6689."), - - HIVE_SSL_PROTOCOL_BLACKLIST("hive.ssl.protocol.blacklist", "SSLv2,SSLv3", - "SSL Versions to disable for all Hive Servers"), - - // HiveServer2 specific configs - HIVE_SERVER2_CLEAR_DANGLING_SCRATCH_DIR("hive.server2.clear.dangling.scratchdir", false, - "Clear dangling scratch dir periodically in HS2"), - HIVE_SERVER2_CLEAR_DANGLING_SCRATCH_DIR_INTERVAL("hive.server2.clear.dangling.scratchdir.interval", - "1800s", new TimeValidator(TimeUnit.SECONDS), - "Interval to clear dangling scratch dir periodically in HS2"), - HIVE_SERVER2_SLEEP_INTERVAL_BETWEEN_START_ATTEMPTS("hive.server2.sleep.interval.between.start.attempts", - "60s", new TimeValidator(TimeUnit.MILLISECONDS, 0l, true, Long.MAX_VALUE, true), - "Amount of time to sleep between HiveServer2 start attempts. Primarily meant for tests"), - HIVE_SERVER2_MAX_START_ATTEMPTS("hive.server2.max.start.attempts", 30L, new RangeValidator(0L, null), - "Number of times HiveServer2 will attempt to start before exiting. The sleep interval between retries" + - " is determined by " + ConfVars.HIVE_SERVER2_SLEEP_INTERVAL_BETWEEN_START_ATTEMPTS.varname + - "\n The default of 30 will keep trying for 30 minutes."), - HIVE_SERVER2_SUPPORT_DYNAMIC_SERVICE_DISCOVERY("hive.server2.support.dynamic.service.discovery", false, - "Whether HiveServer2 supports dynamic service discovery for its clients. " + - "To support this, each instance of HiveServer2 currently uses ZooKeeper to register itself, " + - "when it is brought up. JDBC/ODBC clients should use the ZooKeeper ensemble: " + - "hive.zookeeper.quorum in their connection string."), - HIVE_SERVER2_ZOOKEEPER_NAMESPACE("hive.server2.zookeeper.namespace", "hiveserver2", - "The parent node in ZooKeeper used by HiveServer2 when supporting dynamic service discovery."), - HIVE_SERVER2_ZOOKEEPER_PUBLISH_CONFIGS("hive.server2.zookeeper.publish.configs", true, - "Whether we should publish HiveServer2's configs to ZooKeeper."), - - // HiveServer2 global init file location - HIVE_SERVER2_GLOBAL_INIT_FILE_LOCATION("hive.server2.global.init.file.location", "${env:HIVE_CONF_DIR}", - "Either the location of a HS2 global init file or a directory containing a .hiverc file. If the \n" + - "property is set, the value must be a valid path to an init file or directory where the init file is located."), - HIVE_SERVER2_TRANSPORT_MODE("hive.server2.transport.mode", "binary", new StringSet("binary", "http"), - "Transport mode of HiveServer2."), - HIVE_SERVER2_THRIFT_BIND_HOST("hive.server2.thrift.bind.host", "", - "Bind host on which to run the HiveServer2 Thrift service."), - HIVE_SERVER2_PARALLEL_COMPILATION("hive.driver.parallel.compilation", false, "Whether to\n" + - "enable parallel compilation of the queries between sessions and within the same session on HiveServer2. The default is false."), - HIVE_SERVER2_COMPILE_LOCK_TIMEOUT("hive.server2.compile.lock.timeout", "0s", - new TimeValidator(TimeUnit.SECONDS), - "Number of seconds a request will wait to acquire the compile lock before giving up. " + - "Setting it to 0s disables the timeout."), - HIVE_SERVER2_PARALLEL_OPS_IN_SESSION("hive.server2.parallel.ops.in.session", true, - "Whether to allow several parallel operations (such as SQL statements) in one session."), - - // HiveServer2 WebUI - HIVE_SERVER2_WEBUI_BIND_HOST("hive.server2.webui.host", "0.0.0.0", "The host address the HiveServer2 WebUI will listen on"), - HIVE_SERVER2_WEBUI_PORT("hive.server2.webui.port", 10002, "The port the HiveServer2 WebUI will listen on. This can be" - + "set to 0 or a negative integer to disable the web UI"), - HIVE_SERVER2_WEBUI_MAX_THREADS("hive.server2.webui.max.threads", 50, "The max HiveServer2 WebUI threads"), - HIVE_SERVER2_WEBUI_USE_SSL("hive.server2.webui.use.ssl", false, - "Set this to true for using SSL encryption for HiveServer2 WebUI."), - HIVE_SERVER2_WEBUI_SSL_KEYSTORE_PATH("hive.server2.webui.keystore.path", "", - "SSL certificate keystore location for HiveServer2 WebUI."), - HIVE_SERVER2_WEBUI_SSL_KEYSTORE_PASSWORD("hive.server2.webui.keystore.password", "", - "SSL certificate keystore password for HiveServer2 WebUI."), - HIVE_SERVER2_WEBUI_USE_SPNEGO("hive.server2.webui.use.spnego", false, - "If true, the HiveServer2 WebUI will be secured with SPNEGO. Clients must authenticate with Kerberos."), - HIVE_SERVER2_WEBUI_SPNEGO_KEYTAB("hive.server2.webui.spnego.keytab", "", - "The path to the Kerberos Keytab file containing the HiveServer2 WebUI SPNEGO service principal."), - HIVE_SERVER2_WEBUI_SPNEGO_PRINCIPAL("hive.server2.webui.spnego.principal", - "HTTP/_HOST@EXAMPLE.COM", "The HiveServer2 WebUI SPNEGO service principal.\n" + - "The special string _HOST will be replaced automatically with \n" + - "the value of hive.server2.webui.host or the correct host name."), - HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES("hive.server2.webui.max.historic.queries", 25, - "The maximum number of past queries to show in HiverSever2 WebUI."), - - // Tez session settings - HIVE_SERVER2_TEZ_DEFAULT_QUEUES("hive.server2.tez.default.queues", "", - "A list of comma separated values corresponding to YARN queues of the same name.\n" + - "When HiveServer2 is launched in Tez mode, this configuration needs to be set\n" + - "for multiple Tez sessions to run in parallel on the cluster."), - HIVE_SERVER2_TEZ_SESSIONS_PER_DEFAULT_QUEUE("hive.server2.tez.sessions.per.default.queue", 1, - "A positive integer that determines the number of Tez sessions that should be\n" + - "launched on each of the queues specified by \"hive.server2.tez.default.queues\".\n" + - "Determines the parallelism on each queue."), - HIVE_SERVER2_TEZ_INITIALIZE_DEFAULT_SESSIONS("hive.server2.tez.initialize.default.sessions", - false, - "This flag is used in HiveServer2 to enable a user to use HiveServer2 without\n" + - "turning on Tez for HiveServer2. The user could potentially want to run queries\n" + - "over Tez without the pool of sessions."), - HIVE_SERVER2_TEZ_SESSION_LIFETIME("hive.server2.tez.session.lifetime", "162h", - new TimeValidator(TimeUnit.HOURS), - "The lifetime of the Tez sessions launched by HS2 when default sessions are enabled.\n" + - "Set to 0 to disable session expiration."), - HIVE_SERVER2_TEZ_SESSION_LIFETIME_JITTER("hive.server2.tez.session.lifetime.jitter", "3h", - new TimeValidator(TimeUnit.HOURS), - "The jitter for Tez session lifetime; prevents all the sessions from restarting at once."), - HIVE_SERVER2_TEZ_SESSION_MAX_INIT_THREADS("hive.server2.tez.sessions.init.threads", 16, - "If hive.server2.tez.initialize.default.sessions is enabled, the maximum number of\n" + - "threads to use to initialize the default sessions."), - HIVE_SERVER2_TEZ_SESSION_RESTRICTED_CONFIGS("hive.server2.tez.sessions.restricted.configs", "", - "The configuration settings that cannot be set when submitting jobs to HiveServer2. If\n" + - "any of these are set to values different from those in the server configuration, an\n" + - "exception will be thrown."), - HIVE_SERVER2_TEZ_SESSION_CUSTOM_QUEUE_ALLOWED("hive.server2.tez.sessions.custom.queue.allowed", - "true", new StringSet("true", "false", "ignore"), - "Whether Tez session pool should allow submitting queries to custom queues. The options\n" + - "are true, false (error out), ignore (accept the query but ignore the queue setting)."), - - // Operation log configuration - HIVE_SERVER2_LOGGING_OPERATION_ENABLED("hive.server2.logging.operation.enabled", true, - "When true, HS2 will save operation logs and make them available for clients"), - HIVE_SERVER2_LOGGING_OPERATION_LOG_LOCATION("hive.server2.logging.operation.log.location", - "${system:java.io.tmpdir}" + File.separator + "${system:user.name}" + File.separator + - "operation_logs", - "Top level directory where operation logs are stored if logging functionality is enabled"), - HIVE_SERVER2_LOGGING_OPERATION_LEVEL("hive.server2.logging.operation.level", "EXECUTION", - new StringSet("NONE", "EXECUTION", "PERFORMANCE", "VERBOSE"), - "HS2 operation logging mode available to clients to be set at session level.\n" + - "For this to work, hive.server2.logging.operation.enabled should be set to true.\n" + - " NONE: Ignore any logging\n" + - " EXECUTION: Log completion of tasks\n" + - " PERFORMANCE: Execution + Performance logs \n" + - " VERBOSE: All logs" ), - - // Enable metric collection for HiveServer2 - HIVE_SERVER2_METRICS_ENABLED("hive.server2.metrics.enabled", false, "Enable metrics on the HiveServer2."), - - // http (over thrift) transport settings - HIVE_SERVER2_THRIFT_HTTP_PORT("hive.server2.thrift.http.port", 10001, - "Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'http'."), - HIVE_SERVER2_THRIFT_HTTP_PATH("hive.server2.thrift.http.path", "cliservice", - "Path component of URL endpoint when in HTTP mode."), - HIVE_SERVER2_THRIFT_MAX_MESSAGE_SIZE("hive.server2.thrift.max.message.size", 100*1024*1024, - "Maximum message size in bytes a HS2 server will accept."), - HIVE_SERVER2_THRIFT_HTTP_MAX_IDLE_TIME("hive.server2.thrift.http.max.idle.time", "1800s", - new TimeValidator(TimeUnit.MILLISECONDS), - "Maximum idle time for a connection on the server when in HTTP mode."), - HIVE_SERVER2_THRIFT_HTTP_WORKER_KEEPALIVE_TIME("hive.server2.thrift.http.worker.keepalive.time", "60s", - new TimeValidator(TimeUnit.SECONDS), - "Keepalive time for an idle http worker thread. When the number of workers exceeds min workers, " + - "excessive threads are killed after this time interval."), - HIVE_SERVER2_THRIFT_HTTP_REQUEST_HEADER_SIZE("hive.server2.thrift.http.request.header.size", 6*1024, - "Request header size in bytes, when using HTTP transport mode. Jetty defaults used."), - HIVE_SERVER2_THRIFT_HTTP_RESPONSE_HEADER_SIZE("hive.server2.thrift.http.response.header.size", 6*1024, - "Response header size in bytes, when using HTTP transport mode. Jetty defaults used."), - - // Cookie based authentication when using HTTP Transport - HIVE_SERVER2_THRIFT_HTTP_COOKIE_AUTH_ENABLED("hive.server2.thrift.http.cookie.auth.enabled", true, - "When true, HiveServer2 in HTTP transport mode, will use cookie based authentication mechanism."), - HIVE_SERVER2_THRIFT_HTTP_COOKIE_MAX_AGE("hive.server2.thrift.http.cookie.max.age", "86400s", - new TimeValidator(TimeUnit.SECONDS), - "Maximum age in seconds for server side cookie used by HS2 in HTTP mode."), - HIVE_SERVER2_THRIFT_HTTP_COOKIE_DOMAIN("hive.server2.thrift.http.cookie.domain", null, - "Domain for the HS2 generated cookies"), - HIVE_SERVER2_THRIFT_HTTP_COOKIE_PATH("hive.server2.thrift.http.cookie.path", null, - "Path for the HS2 generated cookies"), - @Deprecated - HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_SECURE("hive.server2.thrift.http.cookie.is.secure", true, - "Deprecated: Secure attribute of the HS2 generated cookie (this is automatically enabled for SSL enabled HiveServer2)."), - HIVE_SERVER2_THRIFT_HTTP_COOKIE_IS_HTTPONLY("hive.server2.thrift.http.cookie.is.httponly", true, - "HttpOnly attribute of the HS2 generated cookie."), - - // binary transport settings - HIVE_SERVER2_THRIFT_PORT("hive.server2.thrift.port", 10000, - "Port number of HiveServer2 Thrift interface when hive.server2.transport.mode is 'binary'."), - HIVE_SERVER2_THRIFT_SASL_QOP("hive.server2.thrift.sasl.qop", "auth", - new StringSet("auth", "auth-int", "auth-conf"), - "Sasl QOP value; set it to one of following values to enable higher levels of\n" + - "protection for HiveServer2 communication with clients.\n" + - "Setting hadoop.rpc.protection to a higher level than HiveServer2 does not\n" + - "make sense in most situations. HiveServer2 ignores hadoop.rpc.protection in favor\n" + - "of hive.server2.thrift.sasl.qop.\n" + - " \"auth\" - authentication only (default)\n" + - " \"auth-int\" - authentication plus integrity protection\n" + - " \"auth-conf\" - authentication plus integrity and confidentiality protection\n" + - "This is applicable only if HiveServer2 is configured to use Kerberos authentication."), - HIVE_SERVER2_THRIFT_MIN_WORKER_THREADS("hive.server2.thrift.min.worker.threads", 5, - "Minimum number of Thrift worker threads"), - HIVE_SERVER2_THRIFT_MAX_WORKER_THREADS("hive.server2.thrift.max.worker.threads", 500, - "Maximum number of Thrift worker threads"), - HIVE_SERVER2_THRIFT_LOGIN_BEBACKOFF_SLOT_LENGTH( - "hive.server2.thrift.exponential.backoff.slot.length", "100ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Binary exponential backoff slot time for Thrift clients during login to HiveServer2,\n" + - "for retries until hitting Thrift client timeout"), - HIVE_SERVER2_THRIFT_LOGIN_TIMEOUT("hive.server2.thrift.login.timeout", "20s", - new TimeValidator(TimeUnit.SECONDS), "Timeout for Thrift clients during login to HiveServer2"), - HIVE_SERVER2_THRIFT_WORKER_KEEPALIVE_TIME("hive.server2.thrift.worker.keepalive.time", "60s", - new TimeValidator(TimeUnit.SECONDS), - "Keepalive time (in seconds) for an idle worker thread. When the number of workers exceeds min workers, " + - "excessive threads are killed after this time interval."), - - // Configuration for async thread pool in SessionManager - HIVE_SERVER2_ASYNC_EXEC_THREADS("hive.server2.async.exec.threads", 100, - "Number of threads in the async thread pool for HiveServer2"), - HIVE_SERVER2_ASYNC_EXEC_SHUTDOWN_TIMEOUT("hive.server2.async.exec.shutdown.timeout", "10s", - new TimeValidator(TimeUnit.SECONDS), - "How long HiveServer2 shutdown will wait for async threads to terminate."), - HIVE_SERVER2_ASYNC_EXEC_WAIT_QUEUE_SIZE("hive.server2.async.exec.wait.queue.size", 100, - "Size of the wait queue for async thread pool in HiveServer2.\n" + - "After hitting this limit, the async thread pool will reject new requests."), - HIVE_SERVER2_ASYNC_EXEC_KEEPALIVE_TIME("hive.server2.async.exec.keepalive.time", "10s", - new TimeValidator(TimeUnit.SECONDS), - "Time that an idle HiveServer2 async thread (from the thread pool) will wait for a new task\n" + - "to arrive before terminating"), - HIVE_SERVER2_ASYNC_EXEC_ASYNC_COMPILE("hive.server2.async.exec.async.compile", false, - "Whether to enable compiling async query asynchronously. If enabled, it is unknown if the query will have any resultset before compilation completed."), - HIVE_SERVER2_LONG_POLLING_TIMEOUT("hive.server2.long.polling.timeout", "5000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Time that HiveServer2 will wait before responding to asynchronous calls that use long polling"), - - HIVE_SESSION_IMPL_CLASSNAME("hive.session.impl.classname", null, "Classname for custom implementation of hive session"), - HIVE_SESSION_IMPL_WITH_UGI_CLASSNAME("hive.session.impl.withugi.classname", null, "Classname for custom implementation of hive session with UGI"), - - // HiveServer2 auth configuration - HIVE_SERVER2_AUTHENTICATION("hive.server2.authentication", "NONE", - new StringSet("NOSASL", "NONE", "LDAP", "KERBEROS", "PAM", "CUSTOM"), - "Client authentication types.\n" + - " NONE: no authentication check\n" + - " LDAP: LDAP/AD based authentication\n" + - " KERBEROS: Kerberos/GSSAPI authentication\n" + - " CUSTOM: Custom authentication provider\n" + - " (Use with property hive.server2.custom.authentication.class)\n" + - " PAM: Pluggable authentication module\n" + - " NOSASL: Raw transport"), - HIVE_SERVER2_ALLOW_USER_SUBSTITUTION("hive.server2.allow.user.substitution", true, - "Allow alternate user to be specified as part of HiveServer2 open connection request."), - HIVE_SERVER2_KERBEROS_KEYTAB("hive.server2.authentication.kerberos.keytab", "", - "Kerberos keytab file for server principal"), - HIVE_SERVER2_KERBEROS_PRINCIPAL("hive.server2.authentication.kerberos.principal", "", - "Kerberos server principal"), - HIVE_SERVER2_SPNEGO_KEYTAB("hive.server2.authentication.spnego.keytab", "", - "keytab file for SPNego principal, optional,\n" + - "typical value would look like /etc/security/keytabs/spnego.service.keytab,\n" + - "This keytab would be used by HiveServer2 when Kerberos security is enabled and \n" + - "HTTP transport mode is used.\n" + - "This needs to be set only if SPNEGO is to be used in authentication.\n" + - "SPNego authentication would be honored only if valid\n" + - " hive.server2.authentication.spnego.principal\n" + - "and\n" + - " hive.server2.authentication.spnego.keytab\n" + - "are specified."), - HIVE_SERVER2_SPNEGO_PRINCIPAL("hive.server2.authentication.spnego.principal", "", - "SPNego service principal, optional,\n" + - "typical value would look like HTTP/_HOST@EXAMPLE.COM\n" + - "SPNego service principal would be used by HiveServer2 when Kerberos security is enabled\n" + - "and HTTP transport mode is used.\n" + - "This needs to be set only if SPNEGO is to be used in authentication."), - HIVE_SERVER2_PLAIN_LDAP_URL("hive.server2.authentication.ldap.url", null, - "LDAP connection URL(s),\n" + - "this value could contain URLs to mutiple LDAP servers instances for HA,\n" + - "each LDAP URL is separated by a SPACE character. URLs are used in the \n" + - " order specified until a connection is successful."), - HIVE_SERVER2_PLAIN_LDAP_BASEDN("hive.server2.authentication.ldap.baseDN", null, "LDAP base DN"), - HIVE_SERVER2_PLAIN_LDAP_DOMAIN("hive.server2.authentication.ldap.Domain", null, ""), - HIVE_SERVER2_PLAIN_LDAP_GROUPDNPATTERN("hive.server2.authentication.ldap.groupDNPattern", null, - "COLON-separated list of patterns to use to find DNs for group entities in this directory.\n" + - "Use %s where the actual group name is to be substituted for.\n" + - "For example: CN=%s,CN=Groups,DC=subdomain,DC=domain,DC=com."), - HIVE_SERVER2_PLAIN_LDAP_GROUPFILTER("hive.server2.authentication.ldap.groupFilter", null, - "COMMA-separated list of LDAP Group names (short name not full DNs).\n" + - "For example: HiveAdmins,HadoopAdmins,Administrators"), - HIVE_SERVER2_PLAIN_LDAP_USERDNPATTERN("hive.server2.authentication.ldap.userDNPattern", null, - "COLON-separated list of patterns to use to find DNs for users in this directory.\n" + - "Use %s where the actual group name is to be substituted for.\n" + - "For example: CN=%s,CN=Users,DC=subdomain,DC=domain,DC=com."), - HIVE_SERVER2_PLAIN_LDAP_USERFILTER("hive.server2.authentication.ldap.userFilter", null, - "COMMA-separated list of LDAP usernames (just short names, not full DNs).\n" + - "For example: hiveuser,impalauser,hiveadmin,hadoopadmin"), - HIVE_SERVER2_PLAIN_LDAP_GUIDKEY("hive.server2.authentication.ldap.guidKey", "uid", - "LDAP attribute name whose values are unique in this LDAP server.\n" + - "For example: uid or CN."), - HIVE_SERVER2_PLAIN_LDAP_GROUPMEMBERSHIP_KEY("hive.server2.authentication.ldap.groupMembershipKey", "member", - "LDAP attribute name on the group object that contains the list of distinguished names\n" + - "for the user, group, and contact objects that are members of the group.\n" + - "For example: member, uniqueMember or memberUid"), - HIVE_SERVER2_PLAIN_LDAP_USERMEMBERSHIP_KEY(HIVE_SERVER2_AUTHENTICATION_LDAP_USERMEMBERSHIPKEY_NAME, null, - "LDAP attribute name on the user object that contains groups of which the user is\n" + - "a direct member, except for the primary group, which is represented by the\n" + - "primaryGroupId.\n" + - "For example: memberOf"), - HIVE_SERVER2_PLAIN_LDAP_GROUPCLASS_KEY("hive.server2.authentication.ldap.groupClassKey", "groupOfNames", - "LDAP attribute name on the group entry that is to be used in LDAP group searches.\n" + - "For example: group, groupOfNames or groupOfUniqueNames."), - HIVE_SERVER2_PLAIN_LDAP_CUSTOMLDAPQUERY("hive.server2.authentication.ldap.customLDAPQuery", null, - "A full LDAP query that LDAP Atn provider uses to execute against LDAP Server.\n" + - "If this query returns a null resultset, the LDAP Provider fails the Authentication\n" + - "request, succeeds if the user is part of the resultset." + - "For example: (&(objectClass=group)(objectClass=top)(instanceType=4)(cn=Domain*)) \n" + - "(&(objectClass=person)(|(sAMAccountName=admin)(|(memberOf=CN=Domain Admins,CN=Users,DC=domain,DC=com)" + - "(memberOf=CN=Administrators,CN=Builtin,DC=domain,DC=com))))"), - HIVE_SERVER2_CUSTOM_AUTHENTICATION_CLASS("hive.server2.custom.authentication.class", null, - "Custom authentication class. Used when property\n" + - "'hive.server2.authentication' is set to 'CUSTOM'. Provided class\n" + - "must be a proper implementation of the interface\n" + - "org.apache.hive.service.auth.PasswdAuthenticationProvider. HiveServer2\n" + - "will call its Authenticate(user, passed) method to authenticate requests.\n" + - "The implementation may optionally implement Hadoop's\n" + - "org.apache.hadoop.conf.Configurable class to grab Hive's Configuration object."), - HIVE_SERVER2_PAM_SERVICES("hive.server2.authentication.pam.services", null, - "List of the underlying pam services that should be used when auth type is PAM\n" + - "A file with the same name must exist in /etc/pam.d"), - - HIVE_SERVER2_ENABLE_DOAS("hive.server2.enable.doAs", true, - "Setting this property to true will have HiveServer2 execute\n" + - "Hive operations as the user making the calls to it."), - HIVE_DISTCP_DOAS_USER("hive.distcp.privileged.doAs","hdfs", - "This property allows privileged distcp executions done by hive\n" + - "to run as this user. Typically, it should be the user you\n" + - "run the namenode as, such as the 'hdfs' user."), - HIVE_SERVER2_TABLE_TYPE_MAPPING("hive.server2.table.type.mapping", "CLASSIC", new StringSet("CLASSIC", "HIVE"), - "This setting reflects how HiveServer2 will report the table types for JDBC and other\n" + - "client implementations that retrieve the available tables and supported table types\n" + - " HIVE : Exposes Hive's native table types like MANAGED_TABLE, EXTERNAL_TABLE, VIRTUAL_VIEW\n" + - " CLASSIC : More generic types like TABLE and VIEW"), - HIVE_SERVER2_SESSION_HOOK("hive.server2.session.hook", "", ""), - - // SSL settings - HIVE_SERVER2_USE_SSL("hive.server2.use.SSL", false, - "Set this to true for using SSL encryption in HiveServer2."), - HIVE_SERVER2_SSL_KEYSTORE_PATH("hive.server2.keystore.path", "", - "SSL certificate keystore location."), - HIVE_SERVER2_SSL_KEYSTORE_PASSWORD("hive.server2.keystore.password", "", - "SSL certificate keystore password."), - HIVE_SERVER2_MAP_FAIR_SCHEDULER_QUEUE("hive.server2.map.fair.scheduler.queue", true, - "If the YARN fair scheduler is configured and HiveServer2 is running in non-impersonation mode,\n" + - "this setting determines the user for fair scheduler queue mapping.\n" + - "If set to true (default), the logged-in user determines the fair scheduler queue\n" + - "for submitted jobs, so that map reduce resource usage can be tracked by user.\n" + - "If set to false, all Hive jobs go to the 'hive' user's queue."), - HIVE_SERVER2_BUILTIN_UDF_WHITELIST("hive.server2.builtin.udf.whitelist", "", - "Comma separated list of builtin udf names allowed in queries.\n" + - "An empty whitelist allows all builtin udfs to be executed. " + - " The udf black list takes precedence over udf white list"), - HIVE_SERVER2_BUILTIN_UDF_BLACKLIST("hive.server2.builtin.udf.blacklist", "", - "Comma separated list of udfs names. These udfs will not be allowed in queries." + - " The udf black list takes precedence over udf white list"), - HIVE_ALLOW_UDF_LOAD_ON_DEMAND("hive.allow.udf.load.on.demand", false, - "Whether enable loading UDFs from metastore on demand; this is mostly relevant for\n" + - "HS2 and was the default behavior before Hive 1.2. Off by default."), - - HIVE_SERVER2_SESSION_CHECK_INTERVAL("hive.server2.session.check.interval", "6h", - new TimeValidator(TimeUnit.MILLISECONDS, 3000l, true, null, false), - "The check interval for session/operation timeout, which can be disabled by setting to zero or negative value."), - HIVE_SERVER2_CLOSE_SESSION_ON_DISCONNECT("hive.server2.close.session.on.disconnect", true, - "Session will be closed when connection is closed. Set this to false to have session outlive its parent connection."), - HIVE_SERVER2_IDLE_SESSION_TIMEOUT("hive.server2.idle.session.timeout", "7d", - new TimeValidator(TimeUnit.MILLISECONDS), - "Session will be closed when it's not accessed for this duration, which can be disabled by setting to zero or negative value."), - HIVE_SERVER2_IDLE_OPERATION_TIMEOUT("hive.server2.idle.operation.timeout", "5d", - new TimeValidator(TimeUnit.MILLISECONDS), - "Operation will be closed when it's not accessed for this duration of time, which can be disabled by setting to zero value.\n" + - " With positive value, it's checked for operations in terminal state only (FINISHED, CANCELED, CLOSED, ERROR).\n" + - " With negative value, it's checked for all of the operations regardless of state."), - HIVE_SERVER2_IDLE_SESSION_CHECK_OPERATION("hive.server2.idle.session.check.operation", true, - "Session will be considered to be idle only if there is no activity, and there is no pending operation.\n" + - " This setting takes effect only if session idle timeout (hive.server2.idle.session.timeout) and checking\n" + - "(hive.server2.session.check.interval) are enabled."), - HIVE_SERVER2_THRIFT_CLIENT_RETRY_LIMIT("hive.server2.thrift.client.retry.limit", 1,"Number of retries upon " + - "failure of Thrift HiveServer2 calls"), - HIVE_SERVER2_THRIFT_CLIENT_CONNECTION_RETRY_LIMIT("hive.server2.thrift.client.connect.retry.limit", 1,"Number of " + - "retries while opening a connection to HiveServe2"), - HIVE_SERVER2_THRIFT_CLIENT_RETRY_DELAY_SECONDS("hive.server2.thrift.client.retry.delay.seconds", "1s", - new TimeValidator(TimeUnit.SECONDS), "Number of seconds for the HiveServer2 thrift client to wait between " + - "consecutive connection attempts. Also specifies the time to wait between retrying thrift calls upon failures"), - HIVE_SERVER2_THRIFT_CLIENT_USER("hive.server2.thrift.client.user", "anonymous","Username to use against thrift" + - " client"), - HIVE_SERVER2_THRIFT_CLIENT_PASSWORD("hive.server2.thrift.client.password", "anonymous","Password to use against " + - "thrift client"), - - // ResultSet serialization settings - HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS("hive.server2.thrift.resultset.serialize.in.tasks", false, - "Whether we should serialize the Thrift structures used in JDBC ResultSet RPC in task nodes.\n " + - "We use SequenceFile and ThriftJDBCBinarySerDe to read and write the final results if this is true."), - // TODO: Make use of this config to configure fetch size - HIVE_SERVER2_THRIFT_RESULTSET_MAX_FETCH_SIZE("hive.server2.thrift.resultset.max.fetch.size", - 10000, "Max number of rows sent in one Fetch RPC call by the server to the client."), - HIVE_SERVER2_THRIFT_RESULTSET_DEFAULT_FETCH_SIZE("hive.server2.thrift.resultset.default.fetch.size", 1000, - "The number of rows sent in one Fetch RPC call by the server to the client, if not\n" + - "specified by the client."), - HIVE_SERVER2_XSRF_FILTER_ENABLED("hive.server2.xsrf.filter.enabled",false, - "If enabled, HiveServer2 will block any requests made to it over http " + - "if an X-XSRF-HEADER header is not present"), - HIVE_SECURITY_COMMAND_WHITELIST("hive.security.command.whitelist", "set,reset,dfs,add,list,delete,reload,compile", - "Comma separated list of non-SQL Hive commands users are authorized to execute"), - HIVE_SERVER2_JOB_CREDENTIAL_PROVIDER_PATH("hive.server2.job.credential.provider.path", "", - "If set, this configuration property should provide a comma-separated list of URLs that indicates the type and " + - "location of providers to be used by hadoop credential provider API. It provides HiveServer2 the ability to provide job-specific " + - "credential providers for jobs run using MR and Spark execution engines. This functionality has not been tested against Tez."), - HIVE_MOVE_FILES_THREAD_COUNT("hive.mv.files.thread", 15, new SizeValidator(0L, true, 1024L, true), "Number of threads" - + " used to move files in move task. Set it to 0 to disable multi-threaded file moves. This parameter is also used by" - + " MSCK to check tables."), - HIVE_LOAD_DYNAMIC_PARTITIONS_THREAD_COUNT("hive.load.dynamic.partitions.thread", 15, - new SizeValidator(1L, true, 1024L, true), - "Number of threads used to load dynamic partitions."), - // If this is set all move tasks at the end of a multi-insert query will only begin once all - // outputs are ready - HIVE_MULTI_INSERT_MOVE_TASKS_SHARE_DEPENDENCIES( - "hive.multi.insert.move.tasks.share.dependencies", false, - "If this is set all move tasks for tables/partitions (not directories) at the end of a\n" + - "multi-insert query will only begin once the dependencies for all these move tasks have been\n" + - "met.\n" + - "Advantages: If concurrency is enabled, the locks will only be released once the query has\n" + - " finished, so with this config enabled, the time when the table/partition is\n" + - " generated will be much closer to when the lock on it is released.\n" + - "Disadvantages: If concurrency is not enabled, with this disabled, the tables/partitions which\n" + - " are produced by this query and finish earlier will be available for querying\n" + - " much earlier. Since the locks are only released once the query finishes, this\n" + - " does not apply if concurrency is enabled."), - - HIVE_INFER_BUCKET_SORT("hive.exec.infer.bucket.sort", false, - "If this is set, when writing partitions, the metadata will include the bucketing/sorting\n" + - "properties with which the data was written if any (this will not overwrite the metadata\n" + - "inherited from the table if the table is bucketed/sorted)"), - - HIVE_INFER_BUCKET_SORT_NUM_BUCKETS_POWER_TWO( - "hive.exec.infer.bucket.sort.num.buckets.power.two", false, - "If this is set, when setting the number of reducers for the map reduce task which writes the\n" + - "final output files, it will choose a number which is a power of two, unless the user specifies\n" + - "the number of reducers to use using mapred.reduce.tasks. The number of reducers\n" + - "may be set to a power of two, only to be followed by a merge task meaning preventing\n" + - "anything from being inferred.\n" + - "With hive.exec.infer.bucket.sort set to true:\n" + - "Advantages: If this is not set, the number of buckets for partitions will seem arbitrary,\n" + - " which means that the number of mappers used for optimized joins, for example, will\n" + - " be very low. With this set, since the number of buckets used for any partition is\n" + - " a power of two, the number of mappers used for optimized joins will be the least\n" + - " number of buckets used by any partition being joined.\n" + - "Disadvantages: This may mean a much larger or much smaller number of reducers being used in the\n" + - " final map reduce job, e.g. if a job was originally going to take 257 reducers,\n" + - " it will now take 512 reducers, similarly if the max number of reducers is 511,\n" + - " and a job was going to use this many, it will now use 256 reducers."), - - HIVEOPTLISTBUCKETING("hive.optimize.listbucketing", false, - "Enable list bucketing optimizer. Default value is false so that we disable it by default."), - - // Allow TCP Keep alive socket option for for HiveServer or a maximum timeout for the socket. - SERVER_READ_SOCKET_TIMEOUT("hive.server.read.socket.timeout", "10s", - new TimeValidator(TimeUnit.SECONDS), - "Timeout for the HiveServer to close the connection if no response from the client. By default, 10 seconds."), - SERVER_TCP_KEEP_ALIVE("hive.server.tcp.keepalive", true, - "Whether to enable TCP keepalive for the Hive Server. Keepalive will prevent accumulation of half-open connections."), - - HIVE_DECODE_PARTITION_NAME("hive.decode.partition.name", false, - "Whether to show the unquoted partition names in query results."), - - HIVE_EXECUTION_ENGINE("hive.execution.engine", "mr", new StringSet("mr", "tez", "spark"), - "Chooses execution engine. Options are: mr (Map reduce, default), tez, spark. While MR\n" + - "remains the default engine for historical reasons, it is itself a historical engine\n" + - "and is deprecated in Hive 2 line. It may be removed without further warning."), - - HIVE_EXECUTION_MODE("hive.execution.mode", "container", new StringSet("container", "llap"), - "Chooses whether query fragments will run in container or in llap"), - - HIVE_JAR_DIRECTORY("hive.jar.directory", null, - "This is the location hive in tez mode will look for to find a site wide \n" + - "installed hive instance."), - HIVE_USER_INSTALL_DIR("hive.user.install.directory", "/user/", - "If hive (in tez mode only) cannot find a usable hive jar in \"hive.jar.directory\", \n" + - "it will upload the hive jar to \"hive.user.install.directory/user.name\"\n" + - "and use it to run queries."), - - // Vectorization enabled - HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", false, - "This flag should be set to true to enable vectorized mode of query execution.\n" + - "The default value is false."), - HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled", true, - "This flag should be set to true to enable vectorized mode of the reduce-side of query execution.\n" + - "The default value is true."), - HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED("hive.vectorized.execution.reduce.groupby.enabled", true, - "This flag should be set to true to enable vectorized mode of the reduce-side GROUP BY query execution.\n" + - "The default value is true."), - HIVE_VECTORIZATION_MAPJOIN_NATIVE_ENABLED("hive.vectorized.execution.mapjoin.native.enabled", true, - "This flag should be set to true to enable native (i.e. non-pass through) vectorization\n" + - "of queries using MapJoin.\n" + - "The default value is true."), - HIVE_VECTORIZATION_MAPJOIN_NATIVE_MULTIKEY_ONLY_ENABLED("hive.vectorized.execution.mapjoin.native.multikey.only.enabled", false, - "This flag should be set to true to restrict use of native vector map join hash tables to\n" + - "the MultiKey in queries using MapJoin.\n" + - "The default value is false."), - HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED("hive.vectorized.execution.mapjoin.minmax.enabled", false, - "This flag should be set to true to enable vector map join hash tables to\n" + - "use max / max filtering for integer join queries using MapJoin.\n" + - "The default value is false."), - HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD("hive.vectorized.execution.mapjoin.overflow.repeated.threshold", -1, - "The number of small table rows for a match in vector map join hash tables\n" + - "where we use the repeated field optimization in overflow vectorized row batch for join queries using MapJoin.\n" + - "A value of -1 means do use the join result optimization. Otherwise, threshold value can be 0 to maximum integer."), - HIVE_VECTORIZATION_MAPJOIN_NATIVE_FAST_HASHTABLE_ENABLED("hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled", false, - "This flag should be set to true to enable use of native fast vector map join hash tables in\n" + - "queries using MapJoin.\n" + - "The default value is false."), - HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000, - "Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed."), - HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000, - "Max number of entries in the vector group by aggregation hashtables. \n" + - "Exceeding this will trigger a flush irrelevant of memory pressure condition."), - HIVE_VECTORIZATION_GROUPBY_FLUSH_PERCENT("hive.vectorized.groupby.flush.percent", (float) 0.1, - "Percent of entries in the group by aggregation hash flushed when the memory threshold is exceeded."), - HIVE_VECTORIZATION_REDUCESINK_NEW_ENABLED("hive.vectorized.execution.reducesink.new.enabled", true, - "This flag should be set to true to enable the new vectorization\n" + - "of queries using ReduceSink.\ni" + - "The default value is true."), - HIVE_VECTORIZATION_USE_VECTORIZED_INPUT_FILE_FORMAT("hive.vectorized.use.vectorized.input.format", true, - "This flag should be set to true to enable vectorizing with vectorized input file format capable SerDe.\n" + - "The default value is true."), - HIVE_VECTORIZATION_USE_VECTOR_DESERIALIZE("hive.vectorized.use.vector.serde.deserialize", true, - "This flag should be set to true to enable vectorizing rows using vector deserialize.\n" + - "The default value is true."), - HIVE_VECTORIZATION_USE_ROW_DESERIALIZE("hive.vectorized.use.row.serde.deserialize", false, - "This flag should be set to true to enable vectorizing using row deserialize.\n" + - "The default value is false."), - HIVE_VECTOR_ADAPTOR_USAGE_MODE("hive.vectorized.adaptor.usage.mode", "all", new StringSet("none", "chosen", "all"), - "Specifies the extent to which the VectorUDFAdaptor will be used for UDFs that do not have a cooresponding vectorized class.\n" + - "0. none : disable any usage of VectorUDFAdaptor\n" + - "1. chosen : use VectorUDFAdaptor for a small set of UDFs that were choosen for good performance\n" + - "2. all : use VectorUDFAdaptor for all UDFs" - ), - - HIVE_TYPE_CHECK_ON_INSERT("hive.typecheck.on.insert", true, "This property has been extended to control " - + "whether to check, convert, and normalize partition value to conform to its column type in " - + "partition operations including but not limited to insert, such as alter, describe etc."), - - HIVE_HADOOP_CLASSPATH("hive.hadoop.classpath", null, - "For Windows OS, we need to pass HIVE_HADOOP_CLASSPATH Java parameter while starting HiveServer2 \n" + - "using \"-hiveconf hive.hadoop.classpath=%HIVE_LIB%\"."), - - HIVE_RPC_QUERY_PLAN("hive.rpc.query.plan", false, - "Whether to send the query plan via local resource or RPC"), - HIVE_AM_SPLIT_GENERATION("hive.compute.splits.in.am", true, - "Whether to generate the splits locally or in the AM (tez only)"), - HIVE_TEZ_GENERATE_CONSISTENT_SPLITS("hive.tez.input.generate.consistent.splits", true, - "Whether to generate consistent split locations when generating splits in the AM"), - HIVE_PREWARM_ENABLED("hive.prewarm.enabled", false, "Enables container prewarm for Tez/Spark (Hadoop 2 only)"), - HIVE_PREWARM_NUM_CONTAINERS("hive.prewarm.numcontainers", 10, "Controls the number of containers to prewarm for Tez/Spark (Hadoop 2 only)"), - - HIVESTAGEIDREARRANGE("hive.stageid.rearrange", "none", new StringSet("none", "idonly", "traverse", "execution"), ""), - HIVEEXPLAINDEPENDENCYAPPENDTASKTYPES("hive.explain.dependency.append.tasktype", false, ""), - - HIVECOUNTERGROUP("hive.counters.group.name", "HIVE", - "The name of counter group for internal Hive variables (CREATED_FILE, FATAL_ERROR, etc.)"), - - HIVE_QUOTEDID_SUPPORT("hive.support.quoted.identifiers", "column", - new StringSet("none", "column"), - "Whether to use quoted identifier. 'none' or 'column' can be used. \n" + - " none: default(past) behavior. Implies only alphaNumeric and underscore are valid characters in identifiers.\n" + - " column: implies column names can contain any character." - ), - HIVE_SUPPORT_SPECICAL_CHARACTERS_IN_TABLE_NAMES("hive.support.special.characters.tablename", true, - "This flag should be set to true to enable support for special characters in table names.\n" - + "When it is set to false, only [a-zA-Z_0-9]+ are supported.\n" - + "The only supported special character right now is '/'. This flag applies only to quoted table names.\n" - + "The default value is true."), - // role names are case-insensitive - USERS_IN_ADMIN_ROLE("hive.users.in.admin.role", "", false, - "Comma separated list of users who are in admin role for bootstrapping.\n" + - "More users can be added in ADMIN role later."), - - HIVE_COMPAT("hive.compat", HiveCompat.DEFAULT_COMPAT_LEVEL, - "Enable (configurable) deprecated behaviors by setting desired level of backward compatibility.\n" + - "Setting to 0.12:\n" + - " Maintains division behavior: int / int = double"), - HIVE_CONVERT_JOIN_BUCKET_MAPJOIN_TEZ("hive.convert.join.bucket.mapjoin.tez", false, - "Whether joins can be automatically converted to bucket map joins in hive \n" + - "when tez is used as the execution engine."), - - HIVE_CHECK_CROSS_PRODUCT("hive.exec.check.crossproducts", true, - "Check if a plan contains a Cross Product. If there is one, output a warning to the Session's console."), - HIVE_LOCALIZE_RESOURCE_WAIT_INTERVAL("hive.localize.resource.wait.interval", "5000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Time to wait for another thread to localize the same resource for hive-tez."), - HIVE_LOCALIZE_RESOURCE_NUM_WAIT_ATTEMPTS("hive.localize.resource.num.wait.attempts", 5, - "The number of attempts waiting for localizing a resource in hive-tez."), - TEZ_AUTO_REDUCER_PARALLELISM("hive.tez.auto.reducer.parallelism", false, - "Turn on Tez' auto reducer parallelism feature. When enabled, Hive will still estimate data sizes\n" + - "and set parallelism estimates. Tez will sample source vertices' output sizes and adjust the estimates at runtime as\n" + - "necessary."), - TEZ_LLAP_MIN_REDUCER_PER_EXECUTOR("hive.tez.llap.min.reducer.per.executor", 0.95f, - "If above 0, the min number of reducers for auto-parallelism for LLAP scheduling will\n" + - "be set to this fraction of the number of executors."), - TEZ_MAX_PARTITION_FACTOR("hive.tez.max.partition.factor", 2f, - "When auto reducer parallelism is enabled this factor will be used to over-partition data in shuffle edges."), - TEZ_MIN_PARTITION_FACTOR("hive.tez.min.partition.factor", 0.25f, - "When auto reducer parallelism is enabled this factor will be used to put a lower limit to the number\n" + - "of reducers that tez specifies."), - TEZ_OPTIMIZE_BUCKET_PRUNING( - "hive.tez.bucket.pruning", false, - "When pruning is enabled, filters on bucket columns will be processed by \n" + - "filtering the splits against a bitset of included buckets. This needs predicates \n"+ - "produced by hive.optimize.ppd and hive.optimize.index.filters."), - TEZ_OPTIMIZE_BUCKET_PRUNING_COMPAT( - "hive.tez.bucket.pruning.compat", true, - "When pruning is enabled, handle possibly broken inserts due to negative hashcodes.\n" + - "This occasionally doubles the data scan cost, but is default enabled for safety"), - TEZ_DYNAMIC_PARTITION_PRUNING( - "hive.tez.dynamic.partition.pruning", true, - "When dynamic pruning is enabled, joins on partition keys will be processed by sending\n" + - "events from the processing vertices to the Tez application master. These events will be\n" + - "used to prune unnecessary partitions."), - TEZ_DYNAMIC_PARTITION_PRUNING_MAX_EVENT_SIZE("hive.tez.dynamic.partition.pruning.max.event.size", 1*1024*1024L, - "Maximum size of events sent by processors in dynamic pruning. If this size is crossed no pruning will take place."), - - TEZ_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE("hive.tez.dynamic.partition.pruning.max.data.size", 100*1024*1024L, - "Maximum total data size of events in dynamic pruning."), - TEZ_DYNAMIC_SEMIJOIN_REDUCTION("hive.tez.dynamic.semijoin.reduction", true, - "When dynamic semijoin is enabled, shuffle joins will perform a leaky semijoin before shuffle. This " + - "requires hive.tez.dynamic.partition.pruning to be enabled."), - TEZ_MIN_BLOOM_FILTER_ENTRIES("hive.tez.min.bloom.filter.entries", 1000000L, - "Bloom filter should be of at min certain size to be effective"), - TEZ_MAX_BLOOM_FILTER_ENTRIES("hive.tez.max.bloom.filter.entries", 100000000L, - "Bloom filter should be of at max certain size to be effective"), - TEZ_BLOOM_FILTER_FACTOR("hive.tez.bloom.filter.factor", (float) 2.0, - "Bloom filter should be a multiple of this factor with nDV"), - TEZ_BIGTABLE_MIN_SIZE_SEMIJOIN_REDUCTION("hive.tez.bigtable.minsize.semijoin.reduction", 100000000L, - "Big table for runtime filteting should be of atleast this size"), - TEZ_DYNAMIC_SEMIJOIN_REDUCTION_THRESHOLD("hive.tez.dynamic.semijoin.reduction.threshold", (float) 0.50, - "Only perform semijoin optimization if the estimated benefit at or above this fraction of the target table"), - TEZ_SMB_NUMBER_WAVES( - "hive.tez.smb.number.waves", - (float) 0.5, - "The number of waves in which to run the SMB join. Account for cluster being occupied. Ideally should be 1 wave."), - TEZ_EXEC_SUMMARY( - "hive.tez.exec.print.summary", - false, - "Display breakdown of execution steps, for every query executed by the shell."), - TEZ_EXEC_INPLACE_PROGRESS( - "hive.tez.exec.inplace.progress", - true, - "Updates tez job execution progress in-place in the terminal when hive-cli is used."), - HIVE_SERVER2_INPLACE_PROGRESS( - "hive.server2.in.place.progress", - true, - "Allows hive server 2 to send progress bar update information. This is currently available" - + " only if the execution engine is tez."), - SPARK_EXEC_INPLACE_PROGRESS("hive.spark.exec.inplace.progress", true, - "Updates spark job execution progress in-place in the terminal."), - TEZ_CONTAINER_MAX_JAVA_HEAP_FRACTION("hive.tez.container.max.java.heap.fraction", 0.8f, - "This is to override the tez setting with the same name"), - TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION_MIN("hive.tez.task.scale.memory.reserve-fraction.min", - 0.3f, "This is to override the tez setting tez.task.scale.memory.reserve-fraction"), - TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION_MAX("hive.tez.task.scale.memory.reserve.fraction.max", - 0.5f, "The maximum fraction of JVM memory which Tez will reserve for the processor"), - TEZ_TASK_SCALE_MEMORY_RESERVE_FRACTION("hive.tez.task.scale.memory.reserve.fraction", - -1f, "The customized fraction of JVM memory which Tez will reserve for the processor"), - // The default is different on the client and server, so it's null here. - LLAP_IO_ENABLED("hive.llap.io.enabled", null, "Whether the LLAP IO layer is enabled."), - LLAP_IO_NONVECTOR_WRAPPER_ENABLED("hive.llap.io.nonvector.wrapper.enabled", true, - "Whether the LLAP IO layer is enabled for non-vectorized queries that read inputs\n" + - "that can be vectorized"), - LLAP_IO_MEMORY_MODE("hive.llap.io.memory.mode", "cache", - new StringSet("cache", "none"), - "LLAP IO memory usage; 'cache' (the default) uses data and metadata cache with a\n" + - "custom off-heap allocator, 'none' doesn't use either (this mode may result in\n" + - "significant performance degradation)"), - LLAP_ALLOCATOR_MIN_ALLOC("hive.llap.io.allocator.alloc.min", "16Kb", new SizeValidator(), - "Minimum allocation possible from LLAP buddy allocator. Allocations below that are\n" + - "padded to minimum allocation. For ORC, should generally be the same as the expected\n" + - "compression buffer size, or next lowest power of 2. Must be a power of 2."), - LLAP_ALLOCATOR_MAX_ALLOC("hive.llap.io.allocator.alloc.max", "16Mb", new SizeValidator(), - "Maximum allocation possible from LLAP buddy allocator. For ORC, should be as large as\n" + - "the largest expected ORC compression buffer size. Must be a power of 2."), - @Deprecated - LLAP_IO_METADATA_FRACTION("hive.llap.io.metadata.fraction", 0.1f, - "Temporary setting for on-heap metadata cache fraction of xmx, set to avoid potential\n" + - "heap problems on very large datasets when on-heap metadata cache takes over\n" + - "everything. -1 managed metadata and data together (which is more flexible). This\n" + - "setting will be removed (in effect become -1) once ORC metadata cache is moved off-heap."), - LLAP_ALLOCATOR_ARENA_COUNT("hive.llap.io.allocator.arena.count", 8, - "Arena count for LLAP low-level cache; cache will be allocated in the steps of\n" + - "(size/arena_count) bytes. This size must be <= 1Gb and >= max allocation; if it is\n" + - "not the case, an adjusted size will be used. Using powers of 2 is recommended."), - LLAP_IO_MEMORY_MAX_SIZE("hive.llap.io.memory.size", "1Gb", new SizeValidator(), - "Maximum size for IO allocator or ORC low-level cache.", "hive.llap.io.cache.orc.size"), - LLAP_ALLOCATOR_DIRECT("hive.llap.io.allocator.direct", true, - "Whether ORC low-level cache should use direct allocation."), - LLAP_ALLOCATOR_MAPPED("hive.llap.io.allocator.mmap", false, - "Whether ORC low-level cache should use memory mapped allocation (direct I/O). \n" + - "This is recommended to be used along-side NVDIMM (DAX) or NVMe flash storage."), - LLAP_ALLOCATOR_MAPPED_PATH("hive.llap.io.allocator.mmap.path", "/tmp", - new WritableDirectoryValidator(), - "The directory location for mapping NVDIMM/NVMe flash storage into the ORC low-level cache."), - LLAP_ALLOCATOR_DISCARD_METHOD("hive.llap.io.allocator.discard.method", "both", - new StringSet("freelist", "brute", "both"), - "Which method to use to force-evict blocks to deal with fragmentation:\n" + - "freelist - use half-size free list (discards less, but also less reliable); brute -\n" + - "brute force, discard whatever we can; both - first try free list, then brute force."), - LLAP_ALLOCATOR_DEFRAG_HEADROOM("hive.llap.io.allocator.defrag.headroom", "1Mb", - "How much of a headroom to leave to allow allocator more flexibility to defragment.\n" + - "The allocator would further cap it to a fraction of total memory."), - LLAP_USE_LRFU("hive.llap.io.use.lrfu", true, - "Whether ORC low-level cache should use LRFU cache policy instead of default (FIFO)."), - LLAP_LRFU_LAMBDA("hive.llap.io.lrfu.lambda", 0.01f, - "Lambda for ORC low-level cache LRFU cache policy. Must be in [0, 1]. 0 makes LRFU\n" + - "behave like LFU, 1 makes it behave like LRU, values in between balance accordingly."), - LLAP_CACHE_ALLOW_SYNTHETIC_FILEID("hive.llap.cache.allow.synthetic.fileid", false, - "Whether LLAP cache should use synthetic file ID if real one is not available. Systems\n" + - "like HDFS, Isilon, etc. provide a unique file/inode ID. On other FSes (e.g. local\n" + - "FS), the cache would not work by default because LLAP is unable to uniquely track the\n" + - "files; enabling this setting allows LLAP to generate file ID from the path, size and\n" + - "modification time, which is almost certain to identify file uniquely. However, if you\n" + - "use a FS without file IDs and rewrite files a lot (or are paranoid), you might want\n" + - "to avoid this setting."), - LLAP_CACHE_ENABLE_ORC_GAP_CACHE("hive.llap.orc.gap.cache", true, - "Whether LLAP cache for ORC should remember gaps in ORC compression buffer read\n" + - "estimates, to avoid re-reading the data that was read once and discarded because it\n" + - "is unneeded. This is only necessary for ORC files written before HIVE-9660."), - LLAP_IO_USE_FILEID_PATH("hive.llap.io.use.fileid.path", true, - "Whether LLAP should use fileId (inode)-based path to ensure better consistency for the\n" + - "cases of file overwrites. This is supported on HDFS."), - // Restricted to text for now as this is a new feature; only text files can be sliced. - LLAP_IO_ENCODE_ENABLED("hive.llap.io.encode.enabled", true, - "Whether LLAP should try to re-encode and cache data for non-ORC formats. This is used\n" + - "on LLAP Server side to determine if the infrastructure for that is initialized."), - LLAP_IO_ENCODE_FORMATS("hive.llap.io.encode.formats", - "org.apache.hadoop.mapred.TextInputFormat,", - "The table input formats for which LLAP IO should re-encode and cache data.\n" + - "Comma-separated list."), - LLAP_IO_ENCODE_ALLOC_SIZE("hive.llap.io.encode.alloc.size", "256Kb", new SizeValidator(), - "Allocation size for the buffers used to cache encoded data from non-ORC files. Must\n" + - "be a power of two between " + LLAP_ALLOCATOR_MIN_ALLOC + " and\n" + - LLAP_ALLOCATOR_MAX_ALLOC + "."), - LLAP_IO_ENCODE_VECTOR_SERDE_ENABLED("hive.llap.io.encode.vector.serde.enabled", true, - "Whether LLAP should use vectorized SerDe reader to read text data when re-encoding."), - LLAP_IO_ENCODE_VECTOR_SERDE_ASYNC_ENABLED("hive.llap.io.encode.vector.serde.async.enabled", - true, - "Whether LLAP should use async mode in vectorized SerDe reader to read text data."), - LLAP_IO_ENCODE_SLICE_ROW_COUNT("hive.llap.io.encode.slice.row.count", 100000, - "Row count to use to separate cache slices when reading encoded data from row-based\n" + - "inputs into LLAP cache, if this feature is enabled."), - LLAP_IO_ENCODE_SLICE_LRR("hive.llap.io.encode.slice.lrr", true, - "Whether to separate cache slices when reading encoded data from text inputs via MR\n" + - "MR LineRecordRedader into LLAP cache, if this feature is enabled. Safety flag."), - LLAP_ORC_ENABLE_TIME_COUNTERS("hive.llap.io.orc.time.counters", true, - "Whether to enable time counters for LLAP IO layer (time spent in HDFS, etc.)"), - LLAP_AUTO_ALLOW_UBER("hive.llap.auto.allow.uber", false, - "Whether or not to allow the planner to run vertices in the AM."), - LLAP_AUTO_ENFORCE_TREE("hive.llap.auto.enforce.tree", true, - "Enforce that all parents are in llap, before considering vertex"), - LLAP_AUTO_ENFORCE_VECTORIZED("hive.llap.auto.enforce.vectorized", true, - "Enforce that inputs are vectorized, before considering vertex"), - LLAP_AUTO_ENFORCE_STATS("hive.llap.auto.enforce.stats", true, - "Enforce that col stats are available, before considering vertex"), - LLAP_AUTO_MAX_INPUT("hive.llap.auto.max.input.size", 10*1024*1024*1024L, - "Check input size, before considering vertex (-1 disables check)"), - LLAP_AUTO_MAX_OUTPUT("hive.llap.auto.max.output.size", 1*1024*1024*1024L, - "Check output size, before considering vertex (-1 disables check)"), - LLAP_SKIP_COMPILE_UDF_CHECK("hive.llap.skip.compile.udf.check", false, - "Whether to skip the compile-time check for non-built-in UDFs when deciding whether to\n" + - "execute tasks in LLAP. Skipping the check allows executing UDFs from pre-localized\n" + - "jars in LLAP; if the jars are not pre-localized, the UDFs will simply fail to load."), - LLAP_ALLOW_PERMANENT_FNS("hive.llap.allow.permanent.fns", true, - "Whether LLAP decider should allow permanent UDFs."), - LLAP_EXECUTION_MODE("hive.llap.execution.mode", "none", - new StringSet("auto", "none", "all", "map", "only"), - "Chooses whether query fragments will run in container or in llap"), - LLAP_OBJECT_CACHE_ENABLED("hive.llap.object.cache.enabled", true, - "Cache objects (plans, hashtables, etc) in llap"), - LLAP_IO_DECODING_METRICS_PERCENTILE_INTERVALS("hive.llap.io.decoding.metrics.percentiles.intervals", "30", - "Comma-delimited set of integers denoting the desired rollover intervals (in seconds)\n" + - "for percentile latency metrics on the LLAP daemon IO decoding time.\n" + - "hive.llap.queue.metrics.percentiles.intervals"), - LLAP_IO_THREADPOOL_SIZE("hive.llap.io.threadpool.size", 10, - "Specify the number of threads to use for low-level IO thread pool."), - LLAP_KERBEROS_PRINCIPAL(HIVE_LLAP_DAEMON_SERVICE_PRINCIPAL_NAME, "", - "The name of the LLAP daemon's service principal."), - LLAP_KERBEROS_KEYTAB_FILE("hive.llap.daemon.keytab.file", "", - "The path to the Kerberos Keytab file containing the LLAP daemon's service principal."), - LLAP_ZKSM_KERBEROS_PRINCIPAL("hive.llap.zk.sm.principal", "", - "The name of the principal to use to talk to ZooKeeper for ZooKeeper SecretManager."), - LLAP_ZKSM_KERBEROS_KEYTAB_FILE("hive.llap.zk.sm.keytab.file", "", - "The path to the Kerberos Keytab file containing the principal to use to talk to\n" + - "ZooKeeper for ZooKeeper SecretManager."), - LLAP_WEBUI_SPNEGO_KEYTAB_FILE("hive.llap.webui.spnego.keytab", "", - "The path to the Kerberos Keytab file containing the LLAP WebUI SPNEGO principal.\n" + - "Typical value would look like /etc/security/keytabs/spnego.service.keytab."), - LLAP_WEBUI_SPNEGO_PRINCIPAL("hive.llap.webui.spnego.principal", "", - "The LLAP WebUI SPNEGO service principal. Configured similarly to\n" + - "hive.server2.webui.spnego.principal"), - LLAP_FS_KERBEROS_PRINCIPAL("hive.llap.task.principal", "", - "The name of the principal to use to run tasks. By default, the clients are required\n" + - "to provide tokens to access HDFS/etc."), - LLAP_FS_KERBEROS_KEYTAB_FILE("hive.llap.task.keytab.file", "", - "The path to the Kerberos Keytab file containing the principal to use to run tasks.\n" + - "By default, the clients are required to provide tokens to access HDFS/etc."), - LLAP_ZKSM_ZK_CONNECTION_STRING("hive.llap.zk.sm.connectionString", "", - "ZooKeeper connection string for ZooKeeper SecretManager."), - LLAP_ZKSM_ZK_SESSION_TIMEOUT("hive.llap.zk.sm.session.timeout", "40s", new TimeValidator( - TimeUnit.MILLISECONDS), "ZooKeeper session timeout for ZK SecretManager."), - LLAP_ZK_REGISTRY_USER("hive.llap.zk.registry.user", "", - "In the LLAP ZooKeeper-based registry, specifies the username in the Zookeeper path.\n" + - "This should be the hive user or whichever user is running the LLAP daemon."), - LLAP_ZK_REGISTRY_NAMESPACE("hive.llap.zk.registry.namespace", null, - "In the LLAP ZooKeeper-based registry, overrides the ZK path namespace. Note that\n" + - "using this makes the path management (e.g. setting correct ACLs) your responsibility."), - // Note: do not rename to ..service.acl; Hadoop generates .hosts setting name from this, - // resulting in a collision with existing hive.llap.daemon.service.hosts and bizarre errors. - // These are read by Hadoop IPC, so you should check the usage and naming conventions (e.g. - // ".blocked" is a string hardcoded by Hadoop, and defaults are enforced elsewhere in Hive) - // before making changes or copy-pasting these. - LLAP_SECURITY_ACL("hive.llap.daemon.acl", "*", "The ACL for LLAP daemon."), - LLAP_SECURITY_ACL_DENY("hive.llap.daemon.acl.blocked", "", "The deny ACL for LLAP daemon."), - LLAP_MANAGEMENT_ACL("hive.llap.management.acl", "*", "The ACL for LLAP daemon management."), - LLAP_MANAGEMENT_ACL_DENY("hive.llap.management.acl.blocked", "", - "The deny ACL for LLAP daemon management."), - LLAP_REMOTE_TOKEN_REQUIRES_SIGNING("hive.llap.remote.token.requires.signing", "true", - new StringSet("false", "except_llap_owner", "true"), - "Whether the token returned from LLAP management API should require fragment signing.\n" + - "True by default; can be disabled to allow CLI to get tokens from LLAP in a secure\n" + - "cluster by setting it to true or 'except_llap_owner' (the latter returns such tokens\n" + - "to everyone except the user LLAP cluster is authenticating under)."), - - // Hadoop DelegationTokenManager default is 1 week. - LLAP_DELEGATION_TOKEN_LIFETIME("hive.llap.daemon.delegation.token.lifetime", "14d", - new TimeValidator(TimeUnit.SECONDS), - "LLAP delegation token lifetime, in seconds if specified without a unit."), - LLAP_MANAGEMENT_RPC_PORT("hive.llap.management.rpc.port", 15004, - "RPC port for LLAP daemon management service."), - LLAP_WEB_AUTO_AUTH("hive.llap.auto.auth", false, - "Whether or not to set Hadoop configs to enable auth in LLAP web app."), - - LLAP_DAEMON_RPC_NUM_HANDLERS("hive.llap.daemon.rpc.num.handlers", 5, - "Number of RPC handlers for LLAP daemon.", "llap.daemon.rpc.num.handlers"), - LLAP_DAEMON_WORK_DIRS("hive.llap.daemon.work.dirs", "", - "Working directories for the daemon. This should not be set if running as a YARN\n" + - "application via Slider. It must be set when not running via Slider on YARN. If the value\n" + - "is set when running as a Slider YARN application, the specified value will be used.", - "llap.daemon.work.dirs"), - LLAP_DAEMON_YARN_SHUFFLE_PORT("hive.llap.daemon.yarn.shuffle.port", 15551, - "YARN shuffle port for LLAP-daemon-hosted shuffle.", "llap.daemon.yarn.shuffle.port"), - LLAP_DAEMON_YARN_CONTAINER_MB("hive.llap.daemon.yarn.container.mb", -1, - "llap server yarn container size in MB. Used in LlapServiceDriver and package.py", "llap.daemon.yarn.container.mb"), - LLAP_DAEMON_QUEUE_NAME("hive.llap.daemon.queue.name", null, - "Queue name within which the llap slider application will run." + - " Used in LlapServiceDriver and package.py"), - // TODO Move the following 2 properties out of Configuration to a constant. - LLAP_DAEMON_CONTAINER_ID("hive.llap.daemon.container.id", null, - "ContainerId of a running LlapDaemon. Used to publish to the registry"), - LLAP_DAEMON_NM_ADDRESS("hive.llap.daemon.nm.address", null, - "NM Address host:rpcPort for the NodeManager on which the instance of the daemon is running.\n" + - "Published to the llap registry. Should never be set by users"), - LLAP_DAEMON_SHUFFLE_DIR_WATCHER_ENABLED("hive.llap.daemon.shuffle.dir.watcher.enabled", false, - "TODO doc", "llap.daemon.shuffle.dir-watcher.enabled"), - LLAP_DAEMON_AM_LIVENESS_HEARTBEAT_INTERVAL_MS( - "hive.llap.daemon.am.liveness.heartbeat.interval.ms", "10000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Tez AM-LLAP heartbeat interval (milliseconds). This needs to be below the task timeout\n" + - "interval, but otherwise as high as possible to avoid unnecessary traffic.", - "llap.daemon.am.liveness.heartbeat.interval-ms"), - LLAP_DAEMON_AM_LIVENESS_CONNECTION_TIMEOUT_MS( - "hive.llap.am.liveness.connection.timeout.ms", "10000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Amount of time to wait on connection failures to the AM from an LLAP daemon before\n" + - "considering the AM to be dead.", "llap.am.liveness.connection.timeout-millis"), - LLAP_DAEMON_AM_USE_FQDN("hive.llap.am.use.fqdn", false, - "Whether to use FQDN of the AM machine when submitting work to LLAP."), - // Not used yet - since the Writable RPC engine does not support this policy. - LLAP_DAEMON_AM_LIVENESS_CONNECTION_SLEEP_BETWEEN_RETRIES_MS( - "hive.llap.am.liveness.connection.sleep.between.retries.ms", "2000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Sleep duration while waiting to retry connection failures to the AM from the daemon for\n" + - "the general keep-alive thread (milliseconds).", - "llap.am.liveness.connection.sleep-between-retries-millis"), - LLAP_DAEMON_TASK_SCHEDULER_TIMEOUT_SECONDS( - "hive.llap.task.scheduler.timeout.seconds", "60s", - new TimeValidator(TimeUnit.SECONDS), - "Amount of time to wait before failing the query when there are no llap daemons running\n" + - "(alive) in the cluster.", "llap.daemon.scheduler.timeout.seconds"), - LLAP_DAEMON_NUM_EXECUTORS("hive.llap.daemon.num.executors", 4, - "Number of executors to use in LLAP daemon; essentially, the number of tasks that can be\n" + - "executed in parallel.", "llap.daemon.num.executors"), - LLAP_MAPJOIN_MEMORY_OVERSUBSCRIBE_FACTOR("hive.llap.mapjoin.memory.oversubscribe.factor", 0.2f, - "Fraction of memory from hive.auto.convert.join.noconditionaltask.size that can be over subscribed\n" + - "by queries running in LLAP mode. This factor has to be from 0.0 to 1.0. Default is 20% over subscription.\n"), - LLAP_MEMORY_OVERSUBSCRIPTION_MAX_EXECUTORS_PER_QUERY("hive.llap.memory.oversubscription.max.executors.per.query", 3, - "Used along with hive.llap.mapjoin.memory.oversubscribe.factor to limit the number of executors from\n" + - "which memory for mapjoin can be borrowed. Default 3 (from 3 other executors\n" + - "hive.llap.mapjoin.memory.oversubscribe.factor amount of memory can be borrowed based on which mapjoin\n" + - "conversion decision will be made). This is only an upper bound. Lower bound is determined by number of\n" + - "executors and configured max concurrency."), - LLAP_MAPJOIN_MEMORY_MONITOR_CHECK_INTERVAL("hive.llap.mapjoin.memory.monitor.check.interval", 100000L, - "Check memory usage of mapjoin hash tables after every interval of this many rows. If map join hash table\n" + - "memory usage exceeds (hive.auto.convert.join.noconditionaltask.size * hive.hash.table.inflation.factor)\n" + - "when running in LLAP, tasks will get killed and not retried. Set the value to 0 to disable this feature."), - LLAP_DAEMON_AM_REPORTER_MAX_THREADS("hive.llap.daemon.am-reporter.max.threads", 4, - "Maximum number of threads to be used for AM reporter. If this is lower than number of\n" + - "executors in llap daemon, it would be set to number of executors at runtime.", - "llap.daemon.am-reporter.max.threads"), - LLAP_DAEMON_RPC_PORT("hive.llap.daemon.rpc.port", 0, "The LLAP daemon RPC port.", - "llap.daemon.rpc.port. A value of 0 indicates a dynamic port"), - LLAP_DAEMON_MEMORY_PER_INSTANCE_MB("hive.llap.daemon.memory.per.instance.mb", 4096, - "The total amount of memory to use for the executors inside LLAP (in megabytes).", - "llap.daemon.memory.per.instance.mb"), - LLAP_DAEMON_XMX_HEADROOM("hive.llap.daemon.xmx.headroom", "5%", - "The total amount of heap memory set aside by LLAP and not used by the executors. Can\n" + - "be specified as size (e.g. '512Mb'), or percentage (e.g. '5%'). Note that the latter is\n" + - "derived from the total daemon XMX, which can be different from the total executor\n" + - "memory if the cache is on-heap; although that's not the default configuration."), - LLAP_DAEMON_VCPUS_PER_INSTANCE("hive.llap.daemon.vcpus.per.instance", 4, - "The total number of vcpus to use for the executors inside LLAP.", - "llap.daemon.vcpus.per.instance"), - LLAP_DAEMON_NUM_FILE_CLEANER_THREADS("hive.llap.daemon.num.file.cleaner.threads", 1, - "Number of file cleaner threads in LLAP.", "llap.daemon.num.file.cleaner.threads"), - LLAP_FILE_CLEANUP_DELAY_SECONDS("hive.llap.file.cleanup.delay.seconds", "300s", - new TimeValidator(TimeUnit.SECONDS), - "How long to delay before cleaning up query files in LLAP (in seconds, for debugging).", - "llap.file.cleanup.delay-seconds"), - LLAP_DAEMON_SERVICE_HOSTS("hive.llap.daemon.service.hosts", null, - "Explicitly specified hosts to use for LLAP scheduling. Useful for testing. By default,\n" + - "YARN registry is used.", "llap.daemon.service.hosts"), - LLAP_DAEMON_SERVICE_REFRESH_INTERVAL("hive.llap.daemon.service.refresh.interval.sec", "60s", - new TimeValidator(TimeUnit.SECONDS), - "LLAP YARN registry service list refresh delay, in seconds.", - "llap.daemon.service.refresh.interval"), - LLAP_DAEMON_COMMUNICATOR_NUM_THREADS("hive.llap.daemon.communicator.num.threads", 10, - "Number of threads to use in LLAP task communicator in Tez AM.", - "llap.daemon.communicator.num.threads"), - LLAP_DAEMON_DOWNLOAD_PERMANENT_FNS("hive.llap.daemon.download.permanent.fns", false, - "Whether LLAP daemon should localize the resources for permanent UDFs."), - LLAP_TASK_SCHEDULER_NODE_REENABLE_MIN_TIMEOUT_MS( - "hive.llap.task.scheduler.node.reenable.min.timeout.ms", "200ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Minimum time after which a previously disabled node will be re-enabled for scheduling,\n" + - "in milliseconds. This may be modified by an exponential back-off if failures persist.", - "llap.task.scheduler.node.re-enable.min.timeout.ms"), - LLAP_TASK_SCHEDULER_NODE_REENABLE_MAX_TIMEOUT_MS( - "hive.llap.task.scheduler.node.reenable.max.timeout.ms", "10000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Maximum time after which a previously disabled node will be re-enabled for scheduling,\n" + - "in milliseconds. This may be modified by an exponential back-off if failures persist.", - "llap.task.scheduler.node.re-enable.max.timeout.ms"), - LLAP_TASK_SCHEDULER_NODE_DISABLE_BACK_OFF_FACTOR( - "hive.llap.task.scheduler.node.disable.backoff.factor", 1.5f, - "Backoff factor on successive blacklists of a node due to some failures. Blacklist times\n" + - "start at the min timeout and go up to the max timeout based on this backoff factor.", - "llap.task.scheduler.node.disable.backoff.factor"), - LLAP_TASK_SCHEDULER_NUM_SCHEDULABLE_TASKS_PER_NODE( - "hive.llap.task.scheduler.num.schedulable.tasks.per.node", 0, - "The number of tasks the AM TaskScheduler will try allocating per node. 0 indicates that\n" + - "this should be picked up from the Registry. -1 indicates unlimited capacity; positive\n" + - "values indicate a specific bound.", "llap.task.scheduler.num.schedulable.tasks.per.node"), - LLAP_TASK_SCHEDULER_LOCALITY_DELAY( - "hive.llap.task.scheduler.locality.delay", "0ms", - new TimeValidator(TimeUnit.MILLISECONDS, -1l, true, Long.MAX_VALUE, true), - "Amount of time to wait before allocating a request which contains location information," + - " to a location other than the ones requested. Set to -1 for an infinite delay, 0" + - "for no delay." - ), - LLAP_DAEMON_TASK_PREEMPTION_METRICS_INTERVALS( - "hive.llap.daemon.task.preemption.metrics.intervals", "30,60,300", - "Comma-delimited set of integers denoting the desired rollover intervals (in seconds)\n" + - " for percentile latency metrics. Used by LLAP daemon task scheduler metrics for\n" + - " time taken to kill task (due to pre-emption) and useful time wasted by the task that\n" + - " is about to be preempted." - ), - LLAP_DAEMON_TASK_SCHEDULER_WAIT_QUEUE_SIZE("hive.llap.daemon.task.scheduler.wait.queue.size", - 10, "LLAP scheduler maximum queue size.", "llap.daemon.task.scheduler.wait.queue.size"), - LLAP_DAEMON_WAIT_QUEUE_COMPARATOR_CLASS_NAME( - "hive.llap.daemon.wait.queue.comparator.class.name", - "org.apache.hadoop.hive.llap.daemon.impl.comparator.ShortestJobFirstComparator", - "The priority comparator to use for LLAP scheduler prioroty queue. The built-in options\n" + - "are org.apache.hadoop.hive.llap.daemon.impl.comparator.ShortestJobFirstComparator and\n" + - ".....FirstInFirstOutComparator", "llap.daemon.wait.queue.comparator.class.name"), - LLAP_DAEMON_TASK_SCHEDULER_ENABLE_PREEMPTION( - "hive.llap.daemon.task.scheduler.enable.preemption", true, - "Whether non-finishable running tasks (e.g. a reducer waiting for inputs) should be\n" + - "preempted by finishable tasks inside LLAP scheduler.", - "llap.daemon.task.scheduler.enable.preemption"), - LLAP_TASK_COMMUNICATOR_CONNECTION_TIMEOUT_MS( - "hive.llap.task.communicator.connection.timeout.ms", "16000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Connection timeout (in milliseconds) before a failure to an LLAP daemon from Tez AM.", - "llap.task.communicator.connection.timeout-millis"), - LLAP_TASK_COMMUNICATOR_LISTENER_THREAD_COUNT( - "hive.llap.task.communicator.listener.thread-count", 30, - "The number of task communicator listener threads."), - LLAP_TASK_COMMUNICATOR_CONNECTION_SLEEP_BETWEEN_RETRIES_MS( - "hive.llap.task.communicator.connection.sleep.between.retries.ms", "2000ms", - new TimeValidator(TimeUnit.MILLISECONDS), - "Sleep duration (in milliseconds) to wait before retrying on error when obtaining a\n" + - "connection to LLAP daemon from Tez AM.", - "llap.task.communicator.connection.sleep-between-retries-millis"), - LLAP_DAEMON_WEB_PORT("hive.llap.daemon.web.port", 15002, "LLAP daemon web UI port.", - "llap.daemon.service.port"), - LLAP_DAEMON_WEB_SSL("hive.llap.daemon.web.ssl", false, - "Whether LLAP daemon web UI should use SSL.", "llap.daemon.service.ssl"), - LLAP_CLIENT_CONSISTENT_SPLITS("hive.llap.client.consistent.splits", false, - "Whether to setup split locations to match nodes on which llap daemons are running, " + - "instead of using the locations provided by the split itself. If there is no llap daemon " + - "running, fall back to locations provided by the split. This is effective only if " + - "hive.execution.mode is llap"), - LLAP_VALIDATE_ACLS("hive.llap.validate.acls", true, - "Whether LLAP should reject permissive ACLs in some cases (e.g. its own management\n" + - "protocol or ZK paths), similar to how ssh refuses a key with bad access permissions."), - LLAP_DAEMON_OUTPUT_SERVICE_PORT("hive.llap.daemon.output.service.port", 15003, - "LLAP daemon output service port"), - LLAP_DAEMON_OUTPUT_STREAM_TIMEOUT("hive.llap.daemon.output.stream.timeout", "120s", - new TimeValidator(TimeUnit.SECONDS), - "The timeout for the client to connect to LLAP output service and start the fragment\n" + - "output after sending the fragment. The fragment will fail if its output is not claimed."), - LLAP_DAEMON_OUTPUT_SERVICE_SEND_BUFFER_SIZE("hive.llap.daemon.output.service.send.buffer.size", - 128 * 1024, "Send buffer size to be used by LLAP daemon output service"), - LLAP_DAEMON_OUTPUT_SERVICE_MAX_PENDING_WRITES("hive.llap.daemon.output.service.max.pending.writes", - 8, "Maximum number of queued writes allowed per connection when sending data\n" + - " via the LLAP output service to external clients."), - LLAP_ENABLE_GRACE_JOIN_IN_LLAP("hive.llap.enable.grace.join.in.llap", false, - "Override if grace join should be allowed to run in llap."), - - LLAP_HS2_ENABLE_COORDINATOR("hive.llap.hs2.coordinator.enabled", true, - "Whether to create the LLAP coordinator; since execution engine and container vs llap\n" + - "settings are both coming from job configs, we don't know at start whether this should\n" + - "be created. Default true."), - LLAP_DAEMON_LOGGER("hive.llap.daemon.logger", Constants.LLAP_LOGGER_NAME_QUERY_ROUTING, - new StringSet(Constants.LLAP_LOGGER_NAME_QUERY_ROUTING, - Constants.LLAP_LOGGER_NAME_RFA, - Constants.LLAP_LOGGER_NAME_CONSOLE), - "logger used for llap-daemons."), - - SPARK_USE_OP_STATS("hive.spark.use.op.stats", true, - "Whether to use operator stats to determine reducer parallelism for Hive on Spark.\n" + - "If this is false, Hive will use source table stats to determine reducer\n" + - "parallelism for all first level reduce tasks, and the maximum reducer parallelism\n" + - "from all parents for all the rest (second level and onward) reducer tasks."), - SPARK_USE_TS_STATS_FOR_MAPJOIN("hive.spark.use.ts.stats.for.mapjoin", false, - "If this is set to true, mapjoin optimization in Hive/Spark will use statistics from\n" + - "TableScan operators at the root of operator tree, instead of parent ReduceSink\n" + - "operators of the Join operator."), - SPARK_CLIENT_FUTURE_TIMEOUT("hive.spark.client.future.timeout", - "60s", new TimeValidator(TimeUnit.SECONDS), - "Timeout for requests from Hive client to remote Spark driver."), - SPARK_JOB_MONITOR_TIMEOUT("hive.spark.job.monitor.timeout", - "60s", new TimeValidator(TimeUnit.SECONDS), - "Timeout for job monitor to get Spark job state."), - SPARK_RPC_CLIENT_CONNECT_TIMEOUT("hive.spark.client.connect.timeout", - "1000ms", new TimeValidator(TimeUnit.MILLISECONDS), - "Timeout for remote Spark driver in connecting back to Hive client."), - SPARK_RPC_CLIENT_HANDSHAKE_TIMEOUT("hive.spark.client.server.connect.timeout", - "90000ms", new TimeValidator(TimeUnit.MILLISECONDS), - "Timeout for handshake between Hive client and remote Spark driver. Checked by both processes."), - SPARK_RPC_SECRET_RANDOM_BITS("hive.spark.client.secret.bits", "256", - "Number of bits of randomness in the generated secret for communication between Hive client and remote Spark driver. " + - "Rounded down to the nearest multiple of 8."), - SPARK_RPC_MAX_THREADS("hive.spark.client.rpc.threads", 8, - "Maximum number of threads for remote Spark driver's RPC event loop."), - SPARK_RPC_MAX_MESSAGE_SIZE("hive.spark.client.rpc.max.size", 50 * 1024 * 1024, - "Maximum message size in bytes for communication between Hive client and remote Spark driver. Default is 50MB."), - SPARK_RPC_CHANNEL_LOG_LEVEL("hive.spark.client.channel.log.level", null, - "Channel logging level for remote Spark driver. One of {DEBUG, ERROR, INFO, TRACE, WARN}."), - SPARK_RPC_SASL_MECHANISM("hive.spark.client.rpc.sasl.mechanisms", "DIGEST-MD5", - "Name of the SASL mechanism to use for authentication."), - SPARK_RPC_SERVER_ADDRESS("hive.spark.client.rpc.server.address", "", - "The server address of HiverServer2 host to be used for communication between Hive client and remote Spark driver. " + - "Default is empty, which means the address will be determined in the same way as for hive.server2.thrift.bind.host." + - "This is only necessary if the host has mutiple network addresses and if a different network address other than " + - "hive.server2.thrift.bind.host is to be used."), - SPARK_RPC_SERVER_PORT("hive.spark.client.rpc.server.port", "", "A list of port ranges which can be used by RPC server " + - "with the format of 49152-49222,49228 and a random one is selected from the list. Default is empty, which randomly " + - "selects one port from all available ones."), - SPARK_DYNAMIC_PARTITION_PRUNING( - "hive.spark.dynamic.partition.pruning", false, - "When dynamic pruning is enabled, joins on partition keys will be processed by writing\n" + - "to a temporary HDFS file, and read later for removing unnecessary partitions."), - SPARK_DYNAMIC_PARTITION_PRUNING_MAX_DATA_SIZE( - "hive.spark.dynamic.partition.pruning.max.data.size", 100*1024*1024L, - "Maximum total data size in dynamic pruning."), - SPARK_USE_GROUPBY_SHUFFLE( - "hive.spark.use.groupby.shuffle", true, - "Spark groupByKey transformation has better performance but uses unbounded memory." + - "Turn this off when there is a memory issue."), - SPARK_JOB_MAX_TASKS("hive.spark.job.max.tasks", -1, "The maximum number of tasks a Spark job may have.\n" + - "If a Spark job contains more tasks than the maximum, it will be cancelled. A value of -1 means no limit."), - SPARK_STAGE_MAX_TASKS("hive.spark.stage.max.tasks", -1, "The maximum number of tasks a stage in a Spark job may have.\n" + - "If a Spark job stage contains more tasks than the maximum, the job will be cancelled. A value of -1 means no limit."), - NWAYJOINREORDER("hive.reorder.nway.joins", true, - "Runs reordering of tables within single n-way join (i.e.: picks streamtable)"), - HIVE_MERGE_NWAY_JOINS("hive.merge.nway.joins", true, - "Merge adjacent joins into a single n-way join"), - HIVE_LOG_N_RECORDS("hive.log.every.n.records", 0L, new RangeValidator(0L, null), - "If value is greater than 0 logs in fixed intervals of size n rather than exponentially."), - HIVE_MSCK_PATH_VALIDATION("hive.msck.path.validation", "throw", - new StringSet("throw", "skip", "ignore"), "The approach msck should take with HDFS " + - "directories that are partition-like but contain unsupported characters. 'throw' (an " + - "exception) is the default; 'skip' will skip the invalid directories and still repair the" + - " others; 'ignore' will skip the validation (legacy behavior, causes bugs in many cases)"), - HIVE_MSCK_REPAIR_BATCH_SIZE( - "hive.msck.repair.batch.size", 0, - "Batch size for the msck repair command. If the value is greater than zero,\n " - + "it will execute batch wise with the configured batch size. In case of errors while\n" - + "adding unknown partitions the batch size is automatically reduced by half in the subsequent\n" - + "retry attempt. The default value is zero which means it will execute directly (not batch wise)"), - HIVE_MSCK_REPAIR_BATCH_MAX_RETRIES("hive.msck.repair.batch.max.retries", 0, - "Maximum number of retries for the msck repair command when adding unknown partitions.\n " - + "If the value is greater than zero it will retry adding unknown partitions until the maximum\n" - + "number of attempts is reached or batch size is reduced to 0, whichever is earlier.\n" - + "In each retry attempt it will reduce the batch size by a factor of 2 until it reaches zero.\n" - + "If the value is set to zero it will retry until the batch size becomes zero as described above."), - HIVE_SERVER2_LLAP_CONCURRENT_QUERIES("hive.server2.llap.concurrent.queries", -1, - "The number of queries allowed in parallel via llap. Negative number implies 'infinite'."), - HIVE_TEZ_ENABLE_MEMORY_MANAGER("hive.tez.enable.memory.manager", true, - "Enable memory manager for tez"), - HIVE_HASH_TABLE_INFLATION_FACTOR("hive.hash.table.inflation.factor", (float) 2.0, - "Expected inflation factor between disk/in memory representation of hash tables"), - HIVE_LOG_TRACE_ID("hive.log.trace.id", "", - "Log tracing id that can be used by upstream clients for tracking respective logs. " + - "Truncated to " + LOG_PREFIX_LENGTH + " characters. Defaults to use auto-generated session id."), - - - HIVE_CONF_RESTRICTED_LIST("hive.conf.restricted.list", - "hive.security.authenticator.manager,hive.security.authorization.manager," + - "hive.security.metastore.authorization.manager,hive.security.metastore.authenticator.manager," + - "hive.users.in.admin.role,hive.server2.xsrf.filter.enabled,hive.security.authorization.enabled," + - "hive.distcp.privileged.doAs," + - "hive.server2.authentication.ldap.baseDN," + - "hive.server2.authentication.ldap.url," + - "hive.server2.authentication.ldap.Domain," + - "hive.server2.authentication.ldap.groupDNPattern," + - "hive.server2.authentication.ldap.groupFilter," + - "hive.server2.authentication.ldap.userDNPattern," + - "hive.server2.authentication.ldap.userFilter," + - "hive.server2.authentication.ldap.groupMembershipKey," + - "hive.server2.authentication.ldap.userMembershipKey," + - "hive.server2.authentication.ldap.groupClassKey," + - "hive.server2.authentication.ldap.customLDAPQuery," + - "hive.spark.client.connect.timeout," + - "hive.spark.client.server.connect.timeout," + - "hive.spark.client.channel.log.level," + - "hive.spark.client.rpc.max.size," + - "hive.spark.client.rpc.threads," + - "hive.spark.client.secret.bits," + - "hive.spark.client.rpc.server.address," + - "hive.spark.client.rpc.server.port", - "Comma separated list of configuration options which are immutable at runtime"), - HIVE_CONF_HIDDEN_LIST("hive.conf.hidden.list", - METASTOREPWD.varname + "," + HIVE_SERVER2_SSL_KEYSTORE_PASSWORD.varname - // Adding the S3 credentials from Hadoop config to be hidden - + ",fs.s3.awsAccessKeyId" - + ",fs.s3.awsSecretAccessKey" - + ",fs.s3n.awsAccessKeyId" - + ",fs.s3n.awsSecretAccessKey" - + ",fs.s3a.access.key" - + ",fs.s3a.secret.key" - + ",fs.s3a.proxy.password", - "Comma separated list of configuration options which should not be read by normal user like passwords"), - HIVE_CONF_INTERNAL_VARIABLE_LIST("hive.conf.internal.variable.list", - "hive.added.files.path,hive.added.jars.path,hive.added.archives.path", - "Comma separated list of variables which are used internally and should not be configurable."), - - HIVE_QUERY_TIMEOUT_SECONDS("hive.query.timeout.seconds", "0s", - new TimeValidator(TimeUnit.SECONDS), - "Timeout for Running Query in seconds. A nonpositive value means infinite. " + - "If the query timeout is also set by thrift API call, the smaller one will be taken."), - - - HIVE_EXEC_INPUT_LISTING_MAX_THREADS("hive.exec.input.listing.max.threads", 0, new SizeValidator(0L, true, 1024L, true), - "Maximum number of threads that Hive uses to list file information from file systems (recommended > 1 for blobstore)."), - - /* BLOBSTORE section */ - - HIVE_BLOBSTORE_SUPPORTED_SCHEMES("hive.blobstore.supported.schemes", "s3,s3a,s3n", - "Comma-separated list of supported blobstore schemes."), - - HIVE_BLOBSTORE_USE_BLOBSTORE_AS_SCRATCHDIR("hive.blobstore.use.blobstore.as.scratchdir", false, - "Enable the use of scratch directories directly on blob storage systems (it may cause performance penalties)."), - - HIVE_BLOBSTORE_OPTIMIZATIONS_ENABLED("hive.blobstore.optimizations.enabled", true, - "This parameter enables a number of optimizations when running on blobstores:\n" + - "(1) If hive.blobstore.use.blobstore.as.scratchdir is false, force the last Hive job to write to the blobstore.\n" + - "This is a performance optimization that forces the final FileSinkOperator to write to the blobstore.\n" + - "See HIVE-15121 for details."); - - public final String varname; - public final String altName; - private final String defaultExpr; - - public final String defaultStrVal; - public final int defaultIntVal; - public final long defaultLongVal; - public final float defaultFloatVal; - public final boolean defaultBoolVal; - - private final Class valClass; - private final VarType valType; - - private final Validator validator; - - private final String description; - - private final boolean excluded; - private final boolean caseSensitive; - - ConfVars(String varname, Object defaultVal, String description) { - this(varname, defaultVal, null, description, true, false, null); - } - - ConfVars(String varname, Object defaultVal, String description, String altName) { - this(varname, defaultVal, null, description, true, false, altName); - } - - ConfVars(String varname, Object defaultVal, Validator validator, String description, - String altName) { - this(varname, defaultVal, validator, description, true, false, altName); - } - - ConfVars(String varname, Object defaultVal, String description, boolean excluded) { - this(varname, defaultVal, null, description, true, excluded, null); - } - - ConfVars(String varname, String defaultVal, boolean caseSensitive, String description) { - this(varname, defaultVal, null, description, caseSensitive, false, null); - } - - ConfVars(String varname, Object defaultVal, Validator validator, String description) { - this(varname, defaultVal, validator, description, true, false, null); - } - - ConfVars(String varname, Object defaultVal, Validator validator, String description, - boolean caseSensitive, boolean excluded, String altName) { - this.varname = varname; - this.validator = validator; - this.description = description; - this.defaultExpr = defaultVal == null ? null : String.valueOf(defaultVal); - this.excluded = excluded; - this.caseSensitive = caseSensitive; - this.altName = altName; - if (defaultVal == null || defaultVal instanceof String) { - this.valClass = String.class; - this.valType = VarType.STRING; - this.defaultStrVal = SystemVariables.substitute((String)defaultVal); - this.defaultIntVal = -1; - this.defaultLongVal = -1; - this.defaultFloatVal = -1; - this.defaultBoolVal = false; - } else if (defaultVal instanceof Integer) { - this.valClass = Integer.class; - this.valType = VarType.INT; - this.defaultStrVal = null; - this.defaultIntVal = (Integer)defaultVal; - this.defaultLongVal = -1; - this.defaultFloatVal = -1; - this.defaultBoolVal = false; - } else if (defaultVal instanceof Long) { - this.valClass = Long.class; - this.valType = VarType.LONG; - this.defaultStrVal = null; - this.defaultIntVal = -1; - this.defaultLongVal = (Long)defaultVal; - this.defaultFloatVal = -1; - this.defaultBoolVal = false; - } else if (defaultVal instanceof Float) { - this.valClass = Float.class; - this.valType = VarType.FLOAT; - this.defaultStrVal = null; - this.defaultIntVal = -1; - this.defaultLongVal = -1; - this.defaultFloatVal = (Float)defaultVal; - this.defaultBoolVal = false; - } else if (defaultVal instanceof Boolean) { - this.valClass = Boolean.class; - this.valType = VarType.BOOLEAN; - this.defaultStrVal = null; - this.defaultIntVal = -1; - this.defaultLongVal = -1; - this.defaultFloatVal = -1; - this.defaultBoolVal = (Boolean)defaultVal; - } else { - throw new IllegalArgumentException("Not supported type value " + defaultVal.getClass() + - " for name " + varname); - } - } - - public boolean isType(String value) { - return valType.isType(value); - } - - public Validator getValidator() { - return validator; - } - - public String validate(String value) { - return validator == null ? null : validator.validate(value); - } - - public String validatorDescription() { - return validator == null ? null : validator.toDescription(); - } - - public String typeString() { - String type = valType.typeString(); - if (valType == VarType.STRING && validator != null) { - if (validator instanceof TimeValidator) { - type += "(TIME)"; - } - } - return type; - } - - public String getRawDescription() { - return description; - } - - public String getDescription() { - String validator = validatorDescription(); - if (validator != null) { - return validator + ".\n" + description; - } - return description; - } - - public boolean isExcluded() { - return excluded; - } - - public boolean isCaseSensitive() { - return caseSensitive; - } - - @Override - public String toString() { - return varname; - } - - private static String findHadoopBinary() { - String val = findHadoopHome(); - // if can't find hadoop home we can at least try /usr/bin/hadoop - val = (val == null ? File.separator + "usr" : val) - + File.separator + "bin" + File.separator + "hadoop"; - // Launch hadoop command file on windows. - return val; - } - - private static String findYarnBinary() { - String val = findHadoopHome(); - val = (val == null ? "yarn" : val + File.separator + "bin" + File.separator + "yarn"); - return val; - } - - private static String findHadoopHome() { - String val = System.getenv("HADOOP_HOME"); - // In Hadoop 1.X and Hadoop 2.X HADOOP_HOME is gone and replaced with HADOOP_PREFIX - if (val == null) { - val = System.getenv("HADOOP_PREFIX"); - } - return val; - } - - public String getDefaultValue() { - return valType.defaultValueString(this); - } - - public String getDefaultExpr() { - return defaultExpr; - } - - private Set getValidStringValues() { - if (validator == null || !(validator instanceof StringSet)) { - throw new RuntimeException(varname + " does not specify a list of valid values"); - } - return ((StringSet)validator).getExpected(); - } - - enum VarType { - STRING { - @Override - void checkType(String value) throws Exception { } - @Override - String defaultValueString(ConfVars confVar) { return confVar.defaultStrVal; } - }, - INT { - @Override - void checkType(String value) throws Exception { Integer.valueOf(value); } - }, - LONG { - @Override - void checkType(String value) throws Exception { Long.valueOf(value); } - }, - FLOAT { - @Override - void checkType(String value) throws Exception { Float.valueOf(value); } - }, - BOOLEAN { - @Override - void checkType(String value) throws Exception { Boolean.valueOf(value); } - }; - - boolean isType(String value) { - try { checkType(value); } catch (Exception e) { return false; } - return true; - } - String typeString() { return name().toUpperCase();} - String defaultValueString(ConfVars confVar) { return confVar.defaultExpr; } - abstract void checkType(String value) throws Exception; - } - } - - /** - * Writes the default ConfVars out to a byte array and returns an input - * stream wrapping that byte array. - * - * We need this in order to initialize the ConfVar properties - * in the underling Configuration object using the addResource(InputStream) - * method. - * - * It is important to use a LoopingByteArrayInputStream because it turns out - * addResource(InputStream) is broken since Configuration tries to read the - * entire contents of the same InputStream repeatedly without resetting it. - * LoopingByteArrayInputStream has special logic to handle this. - */ - private static synchronized InputStream getConfVarInputStream() { - if (confVarByteArray == null) { - try { - // Create a Hadoop configuration without inheriting default settings. - Configuration conf = new Configuration(false); - - applyDefaultNonNullConfVars(conf); - - ByteArrayOutputStream confVarBaos = new ByteArrayOutputStream(); - conf.writeXml(confVarBaos); - confVarByteArray = confVarBaos.toByteArray(); - } catch (Exception e) { - // We're pretty screwed if we can't load the default conf vars - throw new RuntimeException("Failed to initialize default Hive configuration variables!", e); - } - } - return new LoopingByteArrayInputStream(confVarByteArray); - } - - public void verifyAndSet(String name, String value) throws IllegalArgumentException { - if (modWhiteListPattern != null) { - Matcher wlMatcher = modWhiteListPattern.matcher(name); - if (!wlMatcher.matches()) { - throw new IllegalArgumentException("Cannot modify " + name + " at runtime. " - + "It is not in list of params that are allowed to be modified at runtime"); - } - } - if (restrictList.contains(name)) { - throw new IllegalArgumentException("Cannot modify " + name + " at runtime. It is in the list" - + " of parameters that can't be modified at runtime"); - } - String oldValue = name != null ? get(name) : null; - if (name == null || value == null || !value.equals(oldValue)) { - // When either name or value is null, the set method below will fail, - // and throw IllegalArgumentException - set(name, value); - if (isSparkRelatedConfig(name)) { - isSparkConfigUpdated = true; - } - } - } - - public boolean isHiddenConfig(String name) { - return hiddenSet.contains(name); - } - - /** - * check whether spark related property is updated, which includes spark configurations, - * RSC configurations and yarn configuration in Spark on YARN mode. - * @param name - * @return - */ - private boolean isSparkRelatedConfig(String name) { - boolean result = false; - if (name.startsWith("spark")) { // Spark property. - // for now we don't support changing spark app name on the fly - result = !name.equals("spark.app.name"); - } else if (name.startsWith("yarn")) { // YARN property in Spark on YARN mode. - String sparkMaster = get("spark.master"); - if (sparkMaster != null && sparkMaster.startsWith("yarn")) { - result = true; - } - } else if (name.startsWith("hive.spark")) { // Remote Spark Context property. - result = true; - } else if (name.equals("mapreduce.job.queuename")) { - // a special property starting with mapreduce that we would also like to effect if it changes - result = true; - } - - return result; - } - - public static int getIntVar(Configuration conf, ConfVars var) { - assert (var.valClass == Integer.class) : var.varname; - if (var.altName != null) { - return conf.getInt(var.varname, conf.getInt(var.altName, var.defaultIntVal)); - } - return conf.getInt(var.varname, var.defaultIntVal); - } - - public static void setIntVar(Configuration conf, ConfVars var, int val) { - assert (var.valClass == Integer.class) : var.varname; - conf.setInt(var.varname, val); - } - - public int getIntVar(ConfVars var) { - return getIntVar(this, var); - } - - public void setIntVar(ConfVars var, int val) { - setIntVar(this, var, val); - } - - public static long getTimeVar(Configuration conf, ConfVars var, TimeUnit outUnit) { - return toTime(getVar(conf, var), getDefaultTimeUnit(var), outUnit); - } - - public static void setTimeVar(Configuration conf, ConfVars var, long time, TimeUnit timeunit) { - assert (var.valClass == String.class) : var.varname; - conf.set(var.varname, time + stringFor(timeunit)); - } - - public long getTimeVar(ConfVars var, TimeUnit outUnit) { - return getTimeVar(this, var, outUnit); - } - - public void setTimeVar(ConfVars var, long time, TimeUnit outUnit) { - setTimeVar(this, var, time, outUnit); - } - - public static long getSizeVar(Configuration conf, ConfVars var) { - return toSizeBytes(getVar(conf, var)); - } - - public long getSizeVar(ConfVars var) { - return getSizeVar(this, var); - } - - private static TimeUnit getDefaultTimeUnit(ConfVars var) { - TimeUnit inputUnit = null; - if (var.validator instanceof TimeValidator) { - inputUnit = ((TimeValidator)var.validator).getTimeUnit(); - } - return inputUnit; - } - - public static long toTime(String value, TimeUnit inputUnit, TimeUnit outUnit) { - String[] parsed = parseNumberFollowedByUnit(value.trim()); - return outUnit.convert(Long.parseLong(parsed[0].trim()), unitFor(parsed[1].trim(), inputUnit)); - } - - public static long toSizeBytes(String value) { - String[] parsed = parseNumberFollowedByUnit(value.trim()); - return Long.parseLong(parsed[0].trim()) * multiplierFor(parsed[1].trim()); - } - - private static String[] parseNumberFollowedByUnit(String value) { - char[] chars = value.toCharArray(); - int i = 0; - for (; i < chars.length && (chars[i] == '-' || Character.isDigit(chars[i])); i++) { - } - return new String[] {value.substring(0, i), value.substring(i)}; - } - - public static TimeUnit unitFor(String unit, TimeUnit defaultUnit) { - unit = unit.trim().toLowerCase(); - if (unit.isEmpty() || unit.equals("l")) { - if (defaultUnit == null) { - throw new IllegalArgumentException("Time unit is not specified"); - } - return defaultUnit; - } else if (unit.equals("d") || unit.startsWith("day")) { - return TimeUnit.DAYS; - } else if (unit.equals("h") || unit.startsWith("hour")) { - return TimeUnit.HOURS; - } else if (unit.equals("m") || unit.startsWith("min")) { - return TimeUnit.MINUTES; - } else if (unit.equals("s") || unit.startsWith("sec")) { - return TimeUnit.SECONDS; - } else if (unit.equals("ms") || unit.startsWith("msec")) { - return TimeUnit.MILLISECONDS; - } else if (unit.equals("us") || unit.startsWith("usec")) { - return TimeUnit.MICROSECONDS; - } else if (unit.equals("ns") || unit.startsWith("nsec")) { - return TimeUnit.NANOSECONDS; - } - throw new IllegalArgumentException("Invalid time unit " + unit); - } - - - public static long multiplierFor(String unit) { - unit = unit.trim().toLowerCase(); - if (unit.isEmpty() || unit.equals("b") || unit.equals("bytes")) { - return 1; - } else if (unit.equals("kb")) { - return 1024; - } else if (unit.equals("mb")) { - return 1024*1024; - } else if (unit.equals("gb")) { - return 1024*1024*1024; - } else if (unit.equals("tb")) { - return 1024*1024*1024*1024; - } else if (unit.equals("pb")) { - return 1024*1024*1024*1024*1024; - } - throw new IllegalArgumentException("Invalid size unit " + unit); - } - - public static String stringFor(TimeUnit timeunit) { - switch (timeunit) { - case DAYS: return "day"; - case HOURS: return "hour"; - case MINUTES: return "min"; - case SECONDS: return "sec"; - case MILLISECONDS: return "msec"; - case MICROSECONDS: return "usec"; - case NANOSECONDS: return "nsec"; - } - throw new IllegalArgumentException("Invalid timeunit " + timeunit); - } - - public static long getLongVar(Configuration conf, ConfVars var) { - assert (var.valClass == Long.class) : var.varname; - if (var.altName != null) { - return conf.getLong(var.varname, conf.getLong(var.altName, var.defaultLongVal)); - } - return conf.getLong(var.varname, var.defaultLongVal); - } - - public static long getLongVar(Configuration conf, ConfVars var, long defaultVal) { - if (var.altName != null) { - return conf.getLong(var.varname, conf.getLong(var.altName, defaultVal)); - } - return conf.getLong(var.varname, defaultVal); - } - - public static void setLongVar(Configuration conf, ConfVars var, long val) { - assert (var.valClass == Long.class) : var.varname; - conf.setLong(var.varname, val); - } - - public long getLongVar(ConfVars var) { - return getLongVar(this, var); - } - - public void setLongVar(ConfVars var, long val) { - setLongVar(this, var, val); - } - - public static float getFloatVar(Configuration conf, ConfVars var) { - assert (var.valClass == Float.class) : var.varname; - if (var.altName != null) { - return conf.getFloat(var.varname, conf.getFloat(var.altName, var.defaultFloatVal)); - } - return conf.getFloat(var.varname, var.defaultFloatVal); - } - - public static float getFloatVar(Configuration conf, ConfVars var, float defaultVal) { - if (var.altName != null) { - return conf.getFloat(var.varname, conf.getFloat(var.altName, defaultVal)); - } - return conf.getFloat(var.varname, defaultVal); - } - - public static void setFloatVar(Configuration conf, ConfVars var, float val) { - assert (var.valClass == Float.class) : var.varname; - conf.setFloat(var.varname, val); - } - - public float getFloatVar(ConfVars var) { - return getFloatVar(this, var); - } - - public void setFloatVar(ConfVars var, float val) { - setFloatVar(this, var, val); - } - - public static boolean getBoolVar(Configuration conf, ConfVars var) { - assert (var.valClass == Boolean.class) : var.varname; - if (var.altName != null) { - return conf.getBoolean(var.varname, conf.getBoolean(var.altName, var.defaultBoolVal)); - } - return conf.getBoolean(var.varname, var.defaultBoolVal); - } - - public static boolean getBoolVar(Configuration conf, ConfVars var, boolean defaultVal) { - if (var.altName != null) { - return conf.getBoolean(var.varname, conf.getBoolean(var.altName, defaultVal)); - } - return conf.getBoolean(var.varname, defaultVal); - } - - public static void setBoolVar(Configuration conf, ConfVars var, boolean val) { - assert (var.valClass == Boolean.class) : var.varname; - conf.setBoolean(var.varname, val); - } - - public boolean getBoolVar(ConfVars var) { - return getBoolVar(this, var); - } - - public void setBoolVar(ConfVars var, boolean val) { - setBoolVar(this, var, val); - } - - public static String getVar(Configuration conf, ConfVars var) { - assert (var.valClass == String.class) : var.varname; - return var.altName != null ? conf.get(var.varname, conf.get(var.altName, var.defaultStrVal)) - : conf.get(var.varname, var.defaultStrVal); - } - - public static String getVarWithoutType(Configuration conf, ConfVars var) { - return var.altName != null ? conf.get(var.varname, conf.get(var.altName, var.defaultExpr)) - : conf.get(var.varname, var.defaultExpr); - } - - public static String getTrimmedVar(Configuration conf, ConfVars var) { - assert (var.valClass == String.class) : var.varname; - if (var.altName != null) { - return conf.getTrimmed(var.varname, conf.getTrimmed(var.altName, var.defaultStrVal)); - } - return conf.getTrimmed(var.varname, var.defaultStrVal); - } - - public static String[] getTrimmedStringsVar(Configuration conf, ConfVars var) { - assert (var.valClass == String.class) : var.varname; - String[] result = conf.getTrimmedStrings(var.varname, (String[])null); - if (result != null) return result; - if (var.altName != null) { - result = conf.getTrimmedStrings(var.altName, (String[])null); - if (result != null) return result; - } - return org.apache.hadoop.util.StringUtils.getTrimmedStrings(var.defaultStrVal); - } - - public static String getVar(Configuration conf, ConfVars var, String defaultVal) { - String ret = var.altName != null ? conf.get(var.varname, conf.get(var.altName, defaultVal)) - : conf.get(var.varname, defaultVal); - return ret; - } - - public static String getVar(Configuration conf, ConfVars var, EncoderDecoder encoderDecoder) { - return encoderDecoder.decode(getVar(conf, var)); - } - - public String getLogIdVar(String defaultValue) { - String retval = getVar(ConfVars.HIVE_LOG_TRACE_ID); - if (retval.equals("")) { - l4j.info("Using the default value passed in for log id: " + defaultValue); - retval = defaultValue; - } - if (retval.length() > LOG_PREFIX_LENGTH) { - l4j.warn("The original log id prefix is " + retval + " has been truncated to " - + retval.substring(0, LOG_PREFIX_LENGTH - 1)); - retval = retval.substring(0, LOG_PREFIX_LENGTH - 1); - } - return retval; - } - - public static void setVar(Configuration conf, ConfVars var, String val) { - assert (var.valClass == String.class) : var.varname; - conf.set(var.varname, val); - } - public static void setVar(Configuration conf, ConfVars var, String val, - EncoderDecoder encoderDecoder) { - setVar(conf, var, encoderDecoder.encode(val)); - } - - public static ConfVars getConfVars(String name) { - return vars.get(name); - } - - public static ConfVars getMetaConf(String name) { - return metaConfs.get(name); - } - - public String getVar(ConfVars var) { - return getVar(this, var); - } - - public void setVar(ConfVars var, String val) { - setVar(this, var, val); - } - - public String getQueryString() { - return getQueryString(this); - } - - public static String getQueryString(Configuration conf) { - return getVar(conf, ConfVars.HIVEQUERYSTRING, EncoderDecoderFactory.URL_ENCODER_DECODER); - } - - public void setQueryString(String query) { - setQueryString(this, query); - } - - public static void setQueryString(Configuration conf, String query) { - setVar(conf, ConfVars.HIVEQUERYSTRING, query, EncoderDecoderFactory.URL_ENCODER_DECODER); - } - public void logVars(PrintStream ps) { - for (ConfVars one : ConfVars.values()) { - ps.println(one.varname + "=" + ((get(one.varname) != null) ? get(one.varname) : "")); - } - } - - public HiveConf() { - super(); - initialize(this.getClass()); - } - - public HiveConf(Class cls) { - super(); - initialize(cls); - } - - public HiveConf(Configuration other, Class cls) { - super(other); - initialize(cls); - } - - /** - * Copy constructor - */ - public HiveConf(HiveConf other) { - super(other); - hiveJar = other.hiveJar; - auxJars = other.auxJars; - isSparkConfigUpdated = other.isSparkConfigUpdated; - origProp = (Properties)other.origProp.clone(); - restrictList.addAll(other.restrictList); - hiddenSet.addAll(other.hiddenSet); - modWhiteListPattern = other.modWhiteListPattern; - } - - public Properties getAllProperties() { - return getProperties(this); - } - - public static Properties getProperties(Configuration conf) { - Iterator> iter = conf.iterator(); - Properties p = new Properties(); - while (iter.hasNext()) { - Map.Entry e = iter.next(); - p.setProperty(e.getKey(), e.getValue()); - } - return p; - } - - private void initialize(Class cls) { - hiveJar = (new JobConf(cls)).getJar(); - - // preserve the original configuration - origProp = getAllProperties(); - - // Overlay the ConfVars. Note that this ignores ConfVars with null values - addResource(getConfVarInputStream()); - - // Overlay hive-site.xml if it exists - if (hiveSiteURL != null) { - addResource(hiveSiteURL); - } - - // if embedded metastore is to be used as per config so far - // then this is considered like the metastore server case - String msUri = this.getVar(HiveConf.ConfVars.METASTOREURIS); - if(HiveConfUtil.isEmbeddedMetaStore(msUri)){ - setLoadMetastoreConfig(true); - } - - // load hivemetastore-site.xml if this is metastore and file exists - if (isLoadMetastoreConfig() && hivemetastoreSiteUrl != null) { - addResource(hivemetastoreSiteUrl); - } - - // load hiveserver2-site.xml if this is hiveserver2 and file exists - // metastore can be embedded within hiveserver2, in such cases - // the conf params in hiveserver2-site.xml will override whats defined - // in hivemetastore-site.xml - if (isLoadHiveServer2Config() && hiveServer2SiteUrl != null) { - addResource(hiveServer2SiteUrl); - } - - // Overlay the values of any system properties whose names appear in the list of ConfVars - applySystemProperties(); - - if ((this.get("hive.metastore.ds.retry.attempts") != null) || - this.get("hive.metastore.ds.retry.interval") != null) { - l4j.warn("DEPRECATED: hive.metastore.ds.retry.* no longer has any effect. " + - "Use hive.hmshandler.retry.* instead"); - } - - // if the running class was loaded directly (through eclipse) rather than through a - // jar then this would be needed - if (hiveJar == null) { - hiveJar = this.get(ConfVars.HIVEJAR.varname); - } - - if (auxJars == null) { - auxJars = StringUtils.join(FileUtils.getJarFilesByPath(this.get(ConfVars.HIVEAUXJARS.varname), this), ','); - } - - if (getBoolVar(ConfVars.METASTORE_SCHEMA_VERIFICATION)) { - setBoolVar(ConfVars.METASTORE_AUTO_CREATE_ALL, false); - } - - if (getBoolVar(HiveConf.ConfVars.HIVECONFVALIDATION)) { - List trimmed = new ArrayList(); - for (Map.Entry entry : this) { - String key = entry.getKey(); - if (key == null || !key.startsWith("hive.")) { - continue; - } - ConfVars var = HiveConf.getConfVars(key); - if (var == null) { - var = HiveConf.getConfVars(key.trim()); - if (var != null) { - trimmed.add(key); - } - } - if (var == null) { - l4j.warn("HiveConf of name " + key + " does not exist"); - } else if (!var.isType(entry.getValue())) { - l4j.warn("HiveConf " + var.varname + " expects " + var.typeString() + " type value"); - } - } - for (String key : trimmed) { - set(key.trim(), getRaw(key)); - unset(key); - } - } - - setupSQLStdAuthWhiteList(); - - // setup list of conf vars that are not allowed to change runtime - setupRestrictList(); - hiddenSet.clear(); - hiddenSet.addAll(HiveConfUtil.getHiddenSet(this)); - } - - /** - * If the config whitelist param for sql standard authorization is not set, set it up here. - */ - private void setupSQLStdAuthWhiteList() { - String whiteListParamsStr = getVar(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST); - if (whiteListParamsStr == null || whiteListParamsStr.trim().isEmpty()) { - // set the default configs in whitelist - whiteListParamsStr = getSQLStdAuthDefaultWhiteListPattern(); - } - setVar(ConfVars.HIVE_AUTHORIZATION_SQL_STD_AUTH_CONFIG_WHITELIST, whiteListParamsStr); - } - - private static String getSQLStdAuthDefaultWhiteListPattern() { - // create the default white list from list of safe config params - // and regex list - String confVarPatternStr = Joiner.on("|").join(convertVarsToRegex(sqlStdAuthSafeVarNames)); - String regexPatternStr = Joiner.on("|").join(sqlStdAuthSafeVarNameRegexes); - return regexPatternStr + "|" + confVarPatternStr; - } - - /** - * @param paramList list of parameter strings - * @return list of parameter strings with "." replaced by "\." - */ - private static String[] convertVarsToRegex(String[] paramList) { - String[] regexes = new String[paramList.length]; - for(int i=0; i systemProperties = getConfSystemProperties(); - for (Entry systemProperty : systemProperties.entrySet()) { - this.set(systemProperty.getKey(), systemProperty.getValue()); - } - } - - /** - * This method returns a mapping from config variable name to its value for all config variables - * which have been set using System properties - */ - public static Map getConfSystemProperties() { - Map systemProperties = new HashMap(); - - for (ConfVars oneVar : ConfVars.values()) { - if (System.getProperty(oneVar.varname) != null) { - if (System.getProperty(oneVar.varname).length() > 0) { - systemProperties.put(oneVar.varname, System.getProperty(oneVar.varname)); - } - } - } - - return systemProperties; - } - - /** - * Overlays ConfVar properties with non-null values - */ - private static void applyDefaultNonNullConfVars(Configuration conf) { - for (ConfVars var : ConfVars.values()) { - String defaultValue = var.getDefaultValue(); - if (defaultValue == null) { - // Don't override ConfVars with null values - continue; - } - conf.set(var.varname, defaultValue); - } - } - - public Properties getChangedProperties() { - Properties ret = new Properties(); - Properties newProp = getAllProperties(); - - for (Object one : newProp.keySet()) { - String oneProp = (String) one; - String oldValue = origProp.getProperty(oneProp); - if (!StringUtils.equals(oldValue, newProp.getProperty(oneProp))) { - ret.setProperty(oneProp, newProp.getProperty(oneProp)); - } - } - return (ret); - } - - public String getJar() { - return hiveJar; - } - - /** - * @return the auxJars - */ - public String getAuxJars() { - return auxJars; - } - - /** - * Set the auxiliary jars. Used for unit tests only. - * @param auxJars the auxJars to set. - */ - public void setAuxJars(String auxJars) { - this.auxJars = auxJars; - setVar(this, ConfVars.HIVEAUXJARS, auxJars); - } - - public URL getHiveDefaultLocation() { - return hiveDefaultURL; - } - - public static void setHiveSiteLocation(URL location) { - hiveSiteURL = location; - } - - public static URL getHiveSiteLocation() { - return hiveSiteURL; - } - - public static URL getMetastoreSiteLocation() { - return hivemetastoreSiteUrl; - } - - public static URL getHiveServer2SiteLocation() { - return hiveServer2SiteUrl; - } - - /** - * @return the user name set in hadoop.job.ugi param or the current user from System - * @throws IOException - */ - public String getUser() throws IOException { - try { - UserGroupInformation ugi = Utils.getUGI(); - return ugi.getUserName(); - } catch (LoginException le) { - throw new IOException(le); - } - } - - public static String getColumnInternalName(int pos) { - return "_col" + pos; - } - - public static int getPositionFromInternalName(String internalName) { - Pattern internalPattern = Pattern.compile("_col([0-9]+)"); - Matcher m = internalPattern.matcher(internalName); - if (!m.matches()){ - return -1; - } else { - return Integer.parseInt(m.group(1)); - } - } - - /** - * Append comma separated list of config vars to the restrict List - * @param restrictListStr - */ - public void addToRestrictList(String restrictListStr) { - if (restrictListStr == null) { - return; - } - String oldList = this.getVar(ConfVars.HIVE_CONF_RESTRICTED_LIST); - if (oldList == null || oldList.isEmpty()) { - this.setVar(ConfVars.HIVE_CONF_RESTRICTED_LIST, restrictListStr); - } else { - this.setVar(ConfVars.HIVE_CONF_RESTRICTED_LIST, oldList + "," + restrictListStr); - } - setupRestrictList(); - } - - /** - * Set white list of parameters that are allowed to be modified - * - * @param paramNameRegex - */ - @LimitedPrivate(value = { "Currently only for use by HiveAuthorizer" }) - public void setModifiableWhiteListRegex(String paramNameRegex) { - if (paramNameRegex == null) { - return; - } - modWhiteListPattern = Pattern.compile(paramNameRegex); - } - - /** - * Add the HIVE_CONF_RESTRICTED_LIST values to restrictList, - * including HIVE_CONF_RESTRICTED_LIST itself - */ - private void setupRestrictList() { - String restrictListStr = this.getVar(ConfVars.HIVE_CONF_RESTRICTED_LIST); - restrictList.clear(); - if (restrictListStr != null) { - for (String entry : restrictListStr.split(",")) { - restrictList.add(entry.trim()); - } - } - - String internalVariableListStr = this.getVar(ConfVars.HIVE_CONF_INTERNAL_VARIABLE_LIST); - if (internalVariableListStr != null) { - for (String entry : internalVariableListStr.split(",")) { - restrictList.add(entry.trim()); - } - } - - restrictList.add(ConfVars.HIVE_IN_TEST.varname); - restrictList.add(ConfVars.HIVE_CONF_RESTRICTED_LIST.varname); - restrictList.add(ConfVars.HIVE_CONF_HIDDEN_LIST.varname); - restrictList.add(ConfVars.HIVE_CONF_INTERNAL_VARIABLE_LIST.varname); - } - - /** - * Strips hidden config entries from configuration - */ - public void stripHiddenConfigurations(Configuration conf) { - HiveConfUtil.stripConfigurations(conf, hiddenSet); - } - - /** - * @return true if HS2 webui is enabled - */ - public boolean isWebUiEnabled() { - return this.getIntVar(ConfVars.HIVE_SERVER2_WEBUI_PORT) != 0; - } - - /** - * @return true if HS2 webui query-info cache is enabled - */ - public boolean isWebUiQueryInfoCacheEnabled() { - return isWebUiEnabled() && this.getIntVar(ConfVars.HIVE_SERVER2_WEBUI_MAX_HISTORIC_QUERIES) > 0; - } - - - public static boolean isLoadMetastoreConfig() { - return loadMetastoreConfig; - } - - public static void setLoadMetastoreConfig(boolean loadMetastoreConfig) { - HiveConf.loadMetastoreConfig = loadMetastoreConfig; - } - - public static boolean isLoadHiveServer2Config() { - return loadHiveServer2Config; - } - - public static void setLoadHiveServer2Config(boolean loadHiveServer2Config) { - HiveConf.loadHiveServer2Config = loadHiveServer2Config; - } - - public static class StrictChecks { - - private static final String NO_LIMIT_MSG = makeMessage( - "Order by-s without limit", ConfVars.HIVE_STRICT_CHECKS_LARGE_QUERY); - private static final String NO_PARTITIONLESS_MSG = makeMessage( - "Queries against partitioned tables without a partition filter", - ConfVars.HIVE_STRICT_CHECKS_LARGE_QUERY); - private static final String NO_COMPARES_MSG = makeMessage( - "Unsafe compares between different types", ConfVars.HIVE_STRICT_CHECKS_TYPE_SAFETY); - private static final String NO_CARTESIAN_MSG = makeMessage( - "Cartesian products", ConfVars.HIVE_STRICT_CHECKS_CARTESIAN); - private static final String NO_BUCKETING_MSG = makeMessage( - "Load into bucketed tables", ConfVars.HIVE_STRICT_CHECKS_BUCKETING); - - private static String makeMessage(String what, ConfVars setting) { - return what + " are disabled for safety reasons. If you know what you are doing, please set" - + setting.varname + " to false and that " + ConfVars.HIVEMAPREDMODE.varname + " is not" - + " set to 'strict' to proceed. Note that if you may get errors or incorrect results if" - + " you make a mistake while using some of the unsafe features."; - } - - public static String checkNoLimit(Configuration conf) { - return isAllowed(conf, ConfVars.HIVE_STRICT_CHECKS_LARGE_QUERY) ? null : NO_LIMIT_MSG; - } - - public static String checkNoPartitionFilter(Configuration conf) { - return isAllowed(conf, ConfVars.HIVE_STRICT_CHECKS_LARGE_QUERY) - ? null : NO_PARTITIONLESS_MSG; - } - - public static String checkTypeSafety(Configuration conf) { - return isAllowed(conf, ConfVars.HIVE_STRICT_CHECKS_TYPE_SAFETY) ? null : NO_COMPARES_MSG; - } - - public static String checkCartesian(Configuration conf) { - return isAllowed(conf, ConfVars.HIVE_STRICT_CHECKS_CARTESIAN) ? null : NO_CARTESIAN_MSG; - } - - public static String checkBucketing(Configuration conf) { - return isAllowed(conf, ConfVars.HIVE_STRICT_CHECKS_BUCKETING) ? null : NO_BUCKETING_MSG; - } - - private static boolean isAllowed(Configuration conf, ConfVars setting) { - String mode = HiveConf.getVar(conf, ConfVars.HIVEMAPREDMODE, (String)null); - return (mode != null) ? !"strict".equals(mode) : !HiveConf.getBoolVar(conf, setting); - } - } - - public static String getNonMrEngines() { - String result = ""; - for (String s : ConfVars.HIVE_EXECUTION_ENGINE.getValidStringValues()) { - if ("mr".equals(s)) continue; - if (!result.isEmpty()) { - result += ", "; - } - result += s; - } - return result; - } - - public static String generateMrDeprecationWarning() { - return "Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. " - + "Consider using a different execution engine (i.e. " + HiveConf.getNonMrEngines() - + ") or using Hive 1.X releases."; - } - - private static final Object reverseMapLock = new Object(); - private static HashMap reverseMap = null; - - public static HashMap getOrCreateReverseMap() { - // This should be called rarely enough; for now it's ok to just lock every time. - synchronized (reverseMapLock) { - if (reverseMap != null) return reverseMap; - } - HashMap vars = new HashMap<>(); - for (ConfVars val : ConfVars.values()) { - vars.put(val.varname.toLowerCase(), val); - if (val.altName != null && !val.altName.isEmpty()) { - vars.put(val.altName.toLowerCase(), val); - } - } - synchronized (reverseMapLock) { - if (reverseMap != null) return reverseMap; - reverseMap = vars; - return reverseMap; - } - } -} diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java deleted file mode 100644 index 92f9a845e3..0000000000 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/NumDistinctValueEstimator.java +++ /dev/null @@ -1,367 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.metastore; -import java.util.Random; - -import javolution.util.FastBitSet; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.common.type.HiveDecimal; -import org.apache.hadoop.io.Text; - -/* - * https://en.wikipedia.org/wiki/Flajolet%E2%80%93Martin_algorithm - * We implement Flajolet–Martin algorithm in this class. - * The Flajolet–Martin algorithm is an algorithm for approximating the number of distinct elements - * in a stream with a single pass and space-consumption which is logarithmic in the maximum number - * of possible distinct elements in the stream. The algorithm was introduced by Philippe Flajolet - * and G. Nigel Martin in their 1984 paper "Probabilistic Counting Algorithms for Data Base Applications". - * Later it has been refined in the papers "LogLog counting of large cardinalities" by Marianne Durand - * and Philippe Flajolet, and "HyperLogLog: The analysis of a near-optimal cardinality estimation - * algorithm" by Philippe Flajolet et al. - */ - -/* - * The algorithm works like this. - * (1) Set the number of bit vectors, i.e., numBitVectors, based on the precision. - * (2) For each bit vector, generate hash value of the long value and mod it by 2^bitVectorSize-1. (addToEstimator) - * (3) Set the index (addToEstimator) - * (4) Take the average of the index for all the bit vectors and get the estimated NDV (estimateNumDistinctValues). - */ -public class NumDistinctValueEstimator { - - static final Log LOG = LogFactory.getLog(NumDistinctValueEstimator.class.getName()); - - /* We want a,b,x to come from a finite field of size 0 to k, where k is a prime number. - * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to be 2^p -1. - * If a,b,x didn't come from a finite field ax1 + b mod k and ax2 + b mod k will not be pair wise - * independent. As a consequence, the hash values will not distribute uniformly from 0 to 2^p-1 - * thus introducing errors in the estimates. - */ - private static final int BIT_VECTOR_SIZE = 31; - private final int numBitVectors; - - // Refer to Flajolet-Martin'86 for the value of phi - private static final double PHI = 0.77351; - - private final int[] a; - private final int[] b; - private final FastBitSet[] bitVector; - - private final Random aValue; - private final Random bValue; - - /* Create a new distinctValueEstimator - */ - public NumDistinctValueEstimator(int numBitVectors) { - this.numBitVectors = numBitVectors; - bitVector = new FastBitSet[numBitVectors]; - for (int i=0; i< numBitVectors; i++) { - bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE); - } - - a = new int[numBitVectors]; - b = new int[numBitVectors]; - - /* Use a large prime number as a seed to the random number generator. - * Java's random number generator uses the Linear Congruential Generator to generate random - * numbers using the following recurrence relation, - * - * X(n+1) = (a X(n) + c ) mod m - * - * where X0 is the seed. Java implementation uses m = 2^48. This is problematic because 2^48 - * is not a prime number and hence the set of numbers from 0 to m don't form a finite field. - * If these numbers don't come from a finite field any give X(n) and X(n+1) may not be pair - * wise independent. - * - * However, empirically passing in prime numbers as seeds seems to work better than when passing - * composite numbers as seeds. Ideally Java's Random should pick m such that m is prime. - * - */ - aValue = new Random(99397); - bValue = new Random(9876413); - - for (int i = 0; i < numBitVectors; i++) { - int randVal; - /* a and b shouldn't be even; If a and b are even, then none of the values - * will set bit 0 thus introducing errors in the estimate. Both a and b can be even - * 25% of the times and as a result 25% of the bit vectors could be inaccurate. To avoid this - * always pick odd values for a and b. - */ - do { - randVal = aValue.nextInt(); - } while (randVal % 2 == 0); - - a[i] = randVal; - - do { - randVal = bValue.nextInt(); - } while (randVal % 2 == 0); - - b[i] = randVal; - - if (a[i] < 0) { - a[i] = a[i] + (1 << BIT_VECTOR_SIZE - 1); - } - - if (b[i] < 0) { - b[i] = b[i] + (1 << BIT_VECTOR_SIZE - 1); - } - } - } - - public NumDistinctValueEstimator(String s, int numBitVectors) { - this.numBitVectors = numBitVectors; - FastBitSet bitVectorDeser[] = deserialize(s, numBitVectors); - bitVector = new FastBitSet[numBitVectors]; - for(int i=0; i = '0' && c <= '9') { - String t = new String(); - t = t + c; - c = s.charAt(i); - i = i + 1; - - while (c != ',' && c!= '}') { - t = t + c; - c = s.charAt(i); - i = i + 1; - } - - int bitIndex = Integer.parseInt(t); - assert(bitIndex >= 0); - assert(vectorIndex < numBitVectors); - b[vectorIndex].set(bitIndex); - if (c == '}') { - vectorIndex = vectorIndex + 1; - } - } - } - return b; - } - - private int generateHash(long v, int hashNum) { - int mod = (1<> 1; - } - - // Set bitvector[index] := 1 - bitVector[i].set(index); - } - } - - public void addToEstimatorPCSA(long v) { - int hash = generateHashForPCSA(v); - int rho = hash/numBitVectors; - int index; - - // Find the index of the least significant bit that is 1 - for (index=0; index> 1; - } - - // Set bitvector[index] := 1 - bitVector[hash%numBitVectors].set(index); - } - - public void addToEstimator(double d) { - int v = new Double(d).hashCode(); - addToEstimator(v); - } - - public void addToEstimatorPCSA(double d) { - int v = new Double(d).hashCode(); - addToEstimatorPCSA(v); - } - - public void addToEstimator(HiveDecimal decimal) { - int v = decimal.hashCode(); - addToEstimator(v); - } - - public void addToEstimatorPCSA(HiveDecimal decimal) { - int v = decimal.hashCode(); - addToEstimatorPCSA(v); - } - - public void mergeEstimators(NumDistinctValueEstimator o) { - // Bitwise OR the bitvector with the bitvector in the agg buffer - for (int i=0; i partNames, statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso .getStatsData().getSetField()); } - if (numBitVectors <= 0 || !cso.getStatsData().getDecimalStats().isSetBitVectors() + if (numBitVectors == 0 || !cso.getStatsData().getDecimalStats().isSetBitVectors() || cso.getStatsData().getDecimalStats().getBitVectors().length() == 0) { isNDVBitVectorSet = false; break; @@ -74,7 +75,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, double densityAvgSum = 0.0; NumDistinctValueEstimator ndvEstimator = null; if (isNDVBitVectorSet) { - ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + ndvEstimator = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(numBitVectors); } for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); @@ -86,8 +87,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, .getDoubleValue(newData.getLowValue())) / newData.getNumDVs(); } if (isNDVBitVectorSet) { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getNumBitVectors())); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -162,7 +164,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, } else { // we first merge all the adjacent bitvectors that we could merge and // derive new partition names and index. - NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + NumDistinctValueEstimator ndvEstimator = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(numBitVectors); StringBuilder pseudoPartName = new StringBuilder(); double pseudoIndexSum = 0; int length = 0; @@ -216,8 +218,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, } aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getNumBitVectors())); } if (length > 0) { // we have to set ndv diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java index a88ef84e5c..0f449d384b 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/DoubleColumnStatsAggregator.java @@ -26,7 +26,8 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -72,7 +73,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, double densityAvgSum = 0.0; NumDistinctValueEstimator ndvEstimator = null; if (isNDVBitVectorSet) { - ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + ndvEstimator = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(numBitVectors); } for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); @@ -83,8 +84,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); } if (isNDVBitVectorSet) { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getNumBitVectors())); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -148,7 +150,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, } else { // we first merge all the adjacent bitvectors that we could merge and // derive new partition names and index. - NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + NumDistinctValueEstimator ndvEstimator = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(numBitVectors); StringBuilder pseudoPartName = new StringBuilder(); double pseudoIndexSum = 0; int length = 0; @@ -192,8 +194,9 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors(), + ndvEstimator.getNumBitVectors())); } if (length > 0) { // we have to set ndv diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java index 8ac6561aec..e746261851 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/LongColumnStatsAggregator.java @@ -26,7 +26,8 @@ import java.util.List; import java.util.Map; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -58,7 +59,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, cso .getStatsData().getSetField()); } - if (numBitVectors <= 0 || !cso.getStatsData().getLongStats().isSetBitVectors() + if (numBitVectors == 0 || !cso.getStatsData().getLongStats().isSetBitVectors() || cso.getStatsData().getLongStats().getBitVectors().length() == 0) { isNDVBitVectorSet = false; break; @@ -72,7 +73,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, double densityAvgSum = 0.0; NumDistinctValueEstimator ndvEstimator = null; if (isNDVBitVectorSet) { - ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + ndvEstimator = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(numBitVectors); } for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); @@ -83,8 +84,8 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, densityAvgSum += (newData.getHighValue() - newData.getLowValue()) / newData.getNumDVs(); } if (isNDVBitVectorSet) { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors(), numBitVectors)); } if (aggregateData == null) { aggregateData = newData.deepCopy(); @@ -148,7 +149,7 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, } else { // we first merge all the adjacent bitvectors that we could merge and // derive new partition names and index. - NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + NumDistinctValueEstimator ndvEstimator = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(numBitVectors); StringBuilder pseudoPartName = new StringBuilder(); double pseudoIndexSum = 0; int length = 0; @@ -192,8 +193,8 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, newData.getHighValue())); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); } - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(newData.getBitVectors(), numBitVectors)); } if (length > 0) { // we have to set ndv diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java index 2aa4046a46..6455784e9e 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/StringColumnStatsAggregator.java @@ -21,7 +21,8 @@ import java.util.List; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; @@ -62,12 +63,12 @@ public ColumnStatisticsObj aggregate(String colName, List partNames, ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); if (doAllPartitionContainStats && isNDVBitVectorSet) { StringColumnStatsData aggregateData = null; - NumDistinctValueEstimator ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + NumDistinctValueEstimator ndvEstimator = NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator(numBitVectors); for (ColumnStatistics cs : css) { ColumnStatisticsObj cso = cs.getStatsObjIterator().next(); StringColumnStatsData newData = cso.getStatsData().getStringStats(); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + newData.getBitVectors(), ndvEstimator.getNumBitVectors())); if (aggregateData == null) { aggregateData = newData.deepCopy(); } else { diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java index 33c7e3e52c..bf1f3489f5 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMerger.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java index fe890e4e27..b2686a0705 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/ColumnStatsMergerFactory.java @@ -20,7 +20,8 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.metastore.api.BinaryColumnStatsData; import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; @@ -37,10 +38,17 @@ private ColumnStatsMergerFactory() { } - // we depend on the toString() method for javolution.util.FastCollection. private static int countNumBitVectors(String s) { if (s != null) { - return StringUtils.countMatches(s, "{"); + // first try to use HyperLogLog to parse it + try { + HyperLogLog.builder().build().deserialize(s, -1); + return -1; + } catch (Exception e) { + // this should be FMSketch + // we depend on the toString() method for javolution.util.FastCollection. + return StringUtils.countMatches(s, "{"); + } } else { return 0; } @@ -99,8 +107,9 @@ public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsOb default: throw new IllegalArgumentException("Unknown stats type " + typeNew.toString()); } - if (numBitVectors > 0) { - agg.ndvEstimator = new NumDistinctValueEstimator(numBitVectors); + if (numBitVectors != 0) { + agg.ndvEstimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(numBitVectors); } return agg; } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java index 3179b23438..d630210d98 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DateColumnStatsMerger.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; import org.apache.hadoop.hive.metastore.api.DateColumnStatsData; @@ -29,27 +29,25 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { DateColumnStatsData aggregateData = aggregateColStats.getStatsData().getDateStats(); DateColumnStatsData newData = newColStats.getStatsData().getDateStats(); - Date lowValue = - aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData - .getLowValue() : newData.getLowValue(); + Date lowValue = aggregateData.getLowValue().compareTo(newData.getLowValue()) < 0 ? aggregateData + .getLowValue() : newData.getLowValue(); aggregateData.setLowValue(lowValue); - Date highValue = - aggregateData.getHighValue().compareTo(newData.getHighValue()) >= 0 ? aggregateData - .getHighValue() : newData.getHighValue(); + Date highValue = aggregateData.getHighValue().compareTo(newData.getHighValue()) >= 0 ? aggregateData + .getHighValue() : newData.getHighValue(); aggregateData.setHighValue(highValue); aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + aggregateData.getBitVectors(), ndvEstimator.getNumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + newData.getBitVectors(), ndvEstimator.getNumBitVectors())); long ndv = ndvEstimator.estimateNumDistinctValues(); LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); + aggregateData.setBitVectors(ndvEstimator.serialize()); } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java index c13add9d9c..154f9a6ec3 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DecimalColumnStatsMerger.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Decimal; import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; @@ -41,15 +41,15 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + aggregateData.getBitVectors(), ndvEstimator.getNumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + newData.getBitVectors(), ndvEstimator.getNumBitVectors())); long ndv = ndvEstimator.estimateNumDistinctValues(); LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); + aggregateData.setBitVectors(ndvEstimator.serialize()); } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java index fbdba24b0a..6b9dd5f8fb 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/DoubleColumnStatsMerger.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; @@ -34,15 +34,15 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + aggregateData.getBitVectors(), ndvEstimator.getNumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + newData.getBitVectors(), ndvEstimator.getNumBitVectors())); long ndv = ndvEstimator.estimateNumDistinctValues(); LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); + aggregateData.setBitVectors(ndvEstimator.serialize()); } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java index ac65590505..bff36df26b 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/LongColumnStatsMerger.java @@ -19,7 +19,7 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; @@ -34,15 +34,15 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + aggregateData.getBitVectors(), ndvEstimator.getNumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + newData.getBitVectors(), ndvEstimator.getNumBitVectors())); long ndv = ndvEstimator.estimateNumDistinctValues(); LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); + aggregateData.setBitVectors(ndvEstimator.serialize()); } } } diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java index 41587477d3..1c3a2fd23f 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/stats/merge/StringColumnStatsMerger.java @@ -19,10 +19,9 @@ package org.apache.hadoop.hive.metastore.hbase.stats.merge; -import org.apache.hadoop.hive.metastore.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; -import org.apache.parquet.Log; public class StringColumnStatsMerger extends ColumnStatsMerger { @Override @@ -35,15 +34,15 @@ public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj new if (ndvEstimator == null || !newData.isSetBitVectors() || newData.getBitVectors().length() == 0) { aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); } else { - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(aggregateData.getBitVectors(), - ndvEstimator.getnumBitVectors())); - ndvEstimator.mergeEstimators(new NumDistinctValueEstimator(newData.getBitVectors(), - ndvEstimator.getnumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + aggregateData.getBitVectors(), ndvEstimator.getNumBitVectors())); + ndvEstimator.mergeEstimators(NumDistinctValueEstimatorFactory.getNumDistinctValueEstimator( + newData.getBitVectors(), ndvEstimator.getNumBitVectors())); long ndv = ndvEstimator.estimateNumDistinctValues(); LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); aggregateData.setNumDVs(ndv); - aggregateData.setBitVectors(ndvEstimator.serialize().toString()); + aggregateData.setBitVectors(ndvEstimator.serialize()); } } } diff --git a/pom.xml b/pom.xml index f9fae59a5d..2a71f782e0 100644 --- a/pom.xml +++ b/pom.xml @@ -206,6 +206,7 @@ 3.0.0 0.6.0 2.2.4 + 6.5.15 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java deleted file mode 100644 index a05906edfa..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DecimalNumDistinctValueEstimator.java +++ /dev/null @@ -1,42 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.hive.ql.udf.generic; - -import org.apache.hadoop.hive.common.type.HiveDecimal; - -public class DecimalNumDistinctValueEstimator extends NumDistinctValueEstimator { - - public DecimalNumDistinctValueEstimator(int numBitVectors) { - super(numBitVectors); - } - - public DecimalNumDistinctValueEstimator(String s, int numBitVectors) { - super(s, numBitVectors); - } - - public void addToEstimator(HiveDecimal decimal) { - int v = decimal.hashCode(); - super.addToEstimator(v); - } - - public void addToEstimatorPCSA(HiveDecimal decimal) { - int v = decimal.hashCode(); - super.addToEstimatorPCSA(v); - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DoubleNumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DoubleNumDistinctValueEstimator.java deleted file mode 100644 index e76fc74dbc..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/DoubleNumDistinctValueEstimator.java +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -public class DoubleNumDistinctValueEstimator extends NumDistinctValueEstimator { - - public DoubleNumDistinctValueEstimator(int numBitVectors) { - super(numBitVectors); - } - - public DoubleNumDistinctValueEstimator(String s, int numVectors) { - super(s, numVectors); - } - - public void addToEstimator(double d) { - int v = new Double(d).hashCode(); - super.addToEstimator(v); - } - - public void addToEstimatorPCSA(double d) { - int v = new Double(d).hashCode(); - super.addToEstimatorPCSA(v); - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 2ebfcb2360..bfca31afd9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -22,6 +22,10 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.common.classification.InterfaceAudience; +import org.apache.hadoop.hive.common.ndv.FMSketch; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; @@ -53,7 +57,7 @@ public class GenericUDAFComputeStats extends AbstractGenericUDAFResolver { static final Logger LOG = LoggerFactory.getLogger(GenericUDAFComputeStats.class.getName()); - + @Override public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { @@ -436,12 +440,17 @@ public int estimate() { return (int) (model.lengthFor(columnType) + model.primitive1() + model.primitive2() - + ((numDV == null) ? NumDistinctValueEstimator.lengthFor(model, null) : + + ((numDV == null) ? lengthFor(model, null) : numDV.lengthFor(model))); } protected void initNDVEstimator(int numBitVectors) { - numDV = new NumDistinctValueEstimator(numBitVectors); + if (numBitVectors == -1) { + numDV = HyperLogLog.builder().build(); + } else { + numDV = new FMSketch(numBitVectors); + } + numDV.setNumBitVectors(numBitVectors); } protected abstract void update(Object p, PrimitiveObjectInspector inputOI); @@ -466,9 +475,8 @@ protected Object serializePartial(Object[] result) { serializeCommon(result); // Serialize numDistinctValue Estimator - Text t = numDV.serialize(); - ((Text) result[4]).set(t); - ((IntWritable) result[5]).set(numDV.getnumBitVectors()); + ((Text) result[4]).set(numDV.serialize()); + ((IntWritable) result[5]).set(numDV.getNumBitVectors()); return result; } @@ -540,9 +548,6 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { if (myagg.numDV == null) { Object partialValue = soi.getStructFieldData(partial, numBitVectorsField); int numVectors = numBitVectorsFieldOI.get(partialValue); - if (numVectors <= 0) { - return; - } myagg.initNDVEstimator(numVectors); } @@ -561,8 +566,12 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { // Merge numDistinctValue Estimators Object numDistinct = soi.getStructFieldData(partial, ndvField); String v = ndvFieldOI.getPrimitiveJavaObject(numDistinct); - NumDistinctValueEstimator o = - new NumDistinctValueEstimator(v, myagg.numDV.getnumBitVectors()); + NumDistinctValueEstimator o = null; + if (myagg.numDV.getNumBitVectors() == -1) { + o = HyperLogLog.builder().build().deserialize(v, myagg.numDV.getNumBitVectors()); + } else { + o = new FMSketch(v, myagg.numDV.getNumBitVectors()); + } myagg.numDV.mergeEstimators(o); } } @@ -847,7 +856,7 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc public long sumLength; /* Sum of lengths of all values seen so far */ public long count; /* Count of all values seen so far */ public long countNulls; /* Count of number of null values seen so far */ - public StringNumDistinctValueEstimator numDV; /* Distinct value estimator */ + public NumDistinctValueEstimator numDV; /* Distinct value estimator */ public int numBitVectors; public boolean firstItem; @Override @@ -855,7 +864,7 @@ public int estimate() { JavaDataModel model = JavaDataModel.get(); return (int) (model.primitive1() * 2 + model.primitive2() * 4 + model.lengthFor(columnType) + - ((numDV == null) ? NumDistinctValueEstimator.lengthFor(model, null) : + ((numDV == null) ? lengthFor(model, null) : numDV.lengthFor(model))); } @@ -869,7 +878,13 @@ public AggregationBuffer getNewAggregationBuffer() throws HiveException { } public void initNDVEstimator(StringStatsAgg aggBuffer, int numBitVectors) { - aggBuffer.numDV = new StringNumDistinctValueEstimator(numBitVectors); + if (numBitVectors == -1) { + aggBuffer.numDV = HyperLogLog.builder().build(); + + } else { + aggBuffer.numDV = new FMSketch(numBitVectors); + } + aggBuffer.numDV.setNumBitVectors(numBitVectors); aggBuffer.numDV.reset(); } @@ -955,8 +970,6 @@ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveExcep public Object terminatePartial(AggregationBuffer agg) throws HiveException { StringStatsAgg myagg = (StringStatsAgg) agg; - // Serialize numDistinctValue Estimator - Text t = myagg.numDV.serialize(); // Serialize the rest of the values in the AggBuffer ((Text) partialResult[0]).set(myagg.columnType); @@ -964,8 +977,9 @@ public Object terminatePartial(AggregationBuffer agg) throws HiveException { ((LongWritable) partialResult[2]).set(myagg.sumLength); ((LongWritable) partialResult[3]).set(myagg.count); ((LongWritable) partialResult[4]).set(myagg.countNulls); - ((Text) partialResult[5]).set(t); - ((IntWritable) partialResult[6]).set(myagg.numBitVectors); + // Serialize numDistinctValue Estimator + ((Text) partialResult[5]).set(myagg.numDV.serialize()); + ((IntWritable) partialResult[6]).set(myagg.numDV.getNumBitVectors()); return partialResult; } @@ -1007,7 +1021,12 @@ public void merge(AggregationBuffer agg, Object partial) throws HiveException { // Merge numDistinctValue Estimators partialValue = soi.getStructFieldData(partial, ndvField); String v = ndvFieldOI.getPrimitiveJavaObject(partialValue); - NumDistinctValueEstimator o = new NumDistinctValueEstimator(v, myagg.numBitVectors); + NumDistinctValueEstimator o = null; + if (myagg.numDV.getNumBitVectors() == -1) { + o = HyperLogLog.builder().build().deserialize(v, myagg.numDV.getNumBitVectors()); + } else { + o = new FMSketch(v, myagg.numDV.getNumBitVectors()); + } myagg.numDV.mergeEstimators(o); } } @@ -1425,4 +1444,25 @@ public void reset(AggregationBuffer agg) throws HiveException { ((NumericStatsAgg)agg).reset("Date"); } } + + @InterfaceAudience.LimitedPrivate(value = { "Hive" }) + static int lengthFor(JavaDataModel model, Integer numVector) { + int length = model.object(); + length += model.primitive1() * 2; // two int + length += model.primitive2(); // one double + length += model.lengthForRandom() * 2; // two Random + + if (numVector == null) { + numVector = 16; // HiveConf hive.stats.ndv.error default produces 16 + // vectors + } + + if (numVector > 0) { + length += model.array() * 3; // three array + length += model.primitive1() * numVector * 2; // two int array + length += (model.object() + model.array() + model.primitive1() + model.primitive2()) + * numVector; // bitset array + } + return length; + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java deleted file mode 100644 index 1c197a028a..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/LongNumDistinctValueEstimator.java +++ /dev/null @@ -1,43 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -public class LongNumDistinctValueEstimator extends NumDistinctValueEstimator { - - public LongNumDistinctValueEstimator(int numBitVectors) { - super(numBitVectors); - } - - public LongNumDistinctValueEstimator(String s, int numVectors) { - super(s, numVectors); - } - - @Override - public void addToEstimator(long v) { - /* Update summary bitVector : - * Generate hash value of the long value and mod it by 2^bitVectorSize-1. - * In this implementation bitVectorSize is 31. - */ - super.addToEstimator(v); - } - - @Override - public void addToEstimatorPCSA(long v) { - super.addToEstimatorPCSA(v); - } -} diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java deleted file mode 100644 index 601901c163..0000000000 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/StringNumDistinctValueEstimator.java +++ /dev/null @@ -1,39 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hive.ql.udf.generic; - -public class StringNumDistinctValueEstimator extends NumDistinctValueEstimator { - - public StringNumDistinctValueEstimator(int numVectors) { - super(numVectors); - } - - public StringNumDistinctValueEstimator(String s, int numVectors) { - super(s, numVectors); - } - - public void addToEstimator(String s) { - int v = s.hashCode(); - super.addToEstimator(v); - } - - public void addToEstimatorPCSA(String s) { - int v = s.hashCode(); - super.addToEstimatorPCSA(v); - } -} diff --git a/ql/src/test/queries/clientpositive/hll.q b/ql/src/test/queries/clientpositive/hll.q new file mode 100644 index 0000000000..4d263f45c3 --- /dev/null +++ b/ql/src/test/queries/clientpositive/hll.q @@ -0,0 +1,47 @@ +set hive.mapred.mode=nonstrict; +set hive.stats.ndv.error=-1; + +create table i(key int); + +insert overwrite table i select key from src; + +explain analyze table i compute statistics for columns; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key double); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key decimal); + +insert overwrite table i select key from src; + +analyze table i compute statistics for columns; + +desc formatted i key; + +drop table i; + +create table i(key date); + +insert into i values ('2012-08-17'); +insert into i values ('2012-08-17'); +insert into i values ('2013-08-17'); +insert into i values ('2012-03-17'); +insert into i values ('2012-05-17'); + +analyze table i compute statistics for columns; + +desc formatted i key; + diff --git a/ql/src/test/results/clientpositive/hll.q.out b/ql/src/test/results/clientpositive/hll.q.out new file mode 100644 index 0000000000..5d9b5618b9 --- /dev/null +++ b/ql/src/test/results/clientpositive/hll.q.out @@ -0,0 +1,239 @@ +PREHOOK: query: create table i(key int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: explain analyze table i compute statistics for columns +PREHOOK: type: QUERY +POSTHOOK: query: explain analyze table i compute statistics for columns +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + Stage-1 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-0 + Map Reduce + Map Operator Tree: + TableScan + alias: i + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 1406 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: compute_stats(key, (- 1)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-1 + Column Stats Work + Column Stats Desc: + Columns: key + Column Types: int + Table: default.i + +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key int 0 498 0 309 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key double 0.0 498.0 0 309 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key decimal) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key decimal) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert overwrite table i select key from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@i +POSTHOOK: query: insert overwrite table i select key from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key decimal(10,0) 0 498 0 309 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}} +PREHOOK: query: drop table i +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@i +PREHOOK: Output: default@i +POSTHOOK: query: drop table i +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@i +POSTHOOK: Output: default@i +PREHOOK: query: create table i(key date) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@i +POSTHOOK: query: create table i(key date) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@i +PREHOOK: query: insert into i values ('2012-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2013-08-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2013-08-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-03-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-03-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: insert into i values ('2012-05-17') +PREHOOK: type: QUERY +PREHOOK: Output: default@i +POSTHOOK: query: insert into i values ('2012-05-17') +POSTHOOK: type: QUERY +POSTHOOK: Output: default@i +POSTHOOK: Lineage: i.key EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: analyze table i compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@i +#### A masked pattern was here #### +POSTHOOK: query: analyze table i compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@i +#### A masked pattern was here #### +PREHOOK: query: desc formatted i key +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@i +POSTHOOK: query: desc formatted i key +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@i +# col_name data_type min max num_nulls distinct_count avg_col_len max_col_len num_trues num_falses comment + +key date 2012-03-17 2013-08-17 0 4 from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}