From 43a039bfcf9ca0cb7ad04ac568fb9cc95e11ee71 Mon Sep 17 00:00:00 2001 From: Guillem Nieto Date: Fri, 13 Sep 2019 11:51:54 +0200 Subject: [PATCH 1/2] IMPALA-8946: Fix histogram rendering to Prometheus Prometheus has two formats that allows to calculate quantiles (on client side): Histograms and Summaries. The former requires to pick a set of buckets, while the latter only requires to specifiy which quantiles (percentiles) the application wants to provide. The difference between summaries and histograms on Prometheus can be found here: https://prometheus.io/docs/practices/histograms/ Previous to this changes, Impala histograms were rendered as Prometheus histograms, but it was calculating percentiles instead of bucketing the recorded values, which is not what Prometheus clients expects. From now on, Impala histograms will be rendered as Prometheus summaries instead, which expects quantiles. --- be/src/util/histogram-metric.cc | 44 ++++++++---------- be/src/util/metrics-test.cc | 80 +++++++++++++++------------------ 2 files changed, 53 insertions(+), 71 deletions(-) diff --git a/be/src/util/histogram-metric.cc b/be/src/util/histogram-metric.cc index 012711dfc..09fade712 100644 --- a/be/src/util/histogram-metric.cc +++ b/be/src/util/histogram-metric.cc @@ -80,66 +80,58 @@ TMetricKind::type HistogramMetric::ToPrometheus( // check if unit its 'TIME_MS','TIME_US' or 'TIME_NS' and convert it to seconds, // this is because prometheus only supports time format in seconds if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.2\"} " + *value << name << "{quantile=\"0.2\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(25), unit_) << "\n"; } else { - *value << name << "{le=\"0.2\"} " << histogram_->ValueAtPercentile(25) << "\n"; + *value << name << "{quantile=\"0.2\"} " << histogram_->ValueAtPercentile(25) + << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.5\"} " + *value << name << "{quantile=\"0.5\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(50), unit_) << "\n"; } else { - *value << name << "{le=\"0.5\"} " << histogram_->ValueAtPercentile(50) << "\n"; + *value << name << "{quantile=\"0.5\"} " << histogram_->ValueAtPercentile(50) + << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.7\"} " + *value << name << "{quantile=\"0.7\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(75), unit_) << "\n"; } else { - *value << name << "{le=\"0.7\"} " << histogram_->ValueAtPercentile(75) << "\n"; + *value << name << "{quantile=\"0.7\"} " << histogram_->ValueAtPercentile(75) + << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.9\"} " + *value << name << "{quantile=\"0.9\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(90), unit_) << "\n"; } else { - *value << name << "{le=\"0.9\"} " << histogram_->ValueAtPercentile(90) << "\n"; + *value << name << "{quantile=\"0.9\"} " << histogram_->ValueAtPercentile(90) + << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.95\"} " + *value << name << "{quantile=\"0.95\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(95), unit_) << "\n"; } else { - *value << name << "{le=\"0.95\"} " << histogram_->ValueAtPercentile(95) << "\n"; - } - - if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.999\"} " - << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(99.9), unit_) + *value << name << "{quantile=\"0.95\"} " << histogram_->ValueAtPercentile(95) << "\n"; - } else { - *value << name << "{le=\"0.999\"} " << histogram_->ValueAtPercentile(99.9) << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "_max " << ConvertToPrometheusSecs(histogram_->MaxValue(), unit_) + *value << name << "{quantile=\"0.999\"} " + << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(99.9), unit_) << "\n"; } else { - *value << name << "_max " << histogram_->MaxValue() << "\n"; - } - - if (IsUnitTimeBased(unit_)) { - *value << name << "_min " << ConvertToPrometheusSecs(histogram_->MinValue(), unit_) + *value << name << "{quantile=\"0.999\"} " << histogram_->ValueAtPercentile(99.9) << "\n"; - } else { - *value << name << "_min " << histogram_->MinValue() << "\n"; } *value << name << "_count " << histogram_->TotalCount(); } - *metric_kind << "# TYPE " << name << " histogram"; + *metric_kind << "# TYPE " << name << " summary"; return TMetricKind::HISTOGRAM; } diff --git a/be/src/util/metrics-test.cc b/be/src/util/metrics-test.cc index 92fc08f02..8a25f0974 100644 --- a/be/src/util/metrics-test.cc +++ b/be/src/util/metrics-test.cc @@ -738,16 +738,14 @@ TEST_F(MetricsTest, HistogramPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2.5\n" - "impala_histogram_metric{le=\"0.5\"} 5\n" - "impala_histogram_metric{le=\"0.7\"} 7.5\n" - "impala_histogram_metric{le=\"0.9\"} 9\n" - "impala_histogram_metric{le=\"0.95\"} 9.496\n" - "impala_histogram_metric{le=\"0.999\"} 9.984\n" - "impala_histogram_metric_max 10.001\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2.5\n" + "impala_histogram_metric{quantile=\"0.5\"} 5\n" + "impala_histogram_metric{quantile=\"0.7\"} 7.5\n" + "impala_histogram_metric{quantile=\"0.9\"} 9\n" + "impala_histogram_metric{quantile=\"0.95\"} 9.496\n" + "impala_histogram_metric{quantile=\"0.999\"} 9.984\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, HistogramTimeNSPrometheus) { @@ -764,16 +762,14 @@ TEST_F(MetricsTest, HistogramTimeNSPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2.5e-06\n" - "impala_histogram_metric{le=\"0.5\"} 5e-06\n" - "impala_histogram_metric{le=\"0.7\"} 7.5e-06\n" - "impala_histogram_metric{le=\"0.9\"} 9e-06\n" - "impala_histogram_metric{le=\"0.95\"} 9.496e-06\n" - "impala_histogram_metric{le=\"0.999\"} 9.984e-06\n" - "impala_histogram_metric_max 1.0001e-05\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2.5e-06\n" + "impala_histogram_metric{quantile=\"0.5\"} 5e-06\n" + "impala_histogram_metric{quantile=\"0.7\"} 7.5e-06\n" + "impala_histogram_metric{quantile=\"0.9\"} 9e-06\n" + "impala_histogram_metric{quantile=\"0.95\"} 9.496e-06\n" + "impala_histogram_metric{quantile=\"0.999\"} 9.984e-06\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, HistogramTimeSPrometheus) { @@ -790,16 +786,14 @@ TEST_F(MetricsTest, HistogramTimeSPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2500\n" - "impala_histogram_metric{le=\"0.5\"} 5000\n" - "impala_histogram_metric{le=\"0.7\"} 7500\n" - "impala_histogram_metric{le=\"0.9\"} 9000\n" - "impala_histogram_metric{le=\"0.95\"} 9496\n" - "impala_histogram_metric{le=\"0.999\"} 9984\n" - "impala_histogram_metric_max 10001\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2500\n" + "impala_histogram_metric{quantile=\"0.5\"} 5000\n" + "impala_histogram_metric{quantile=\"0.7\"} 7500\n" + "impala_histogram_metric{quantile=\"0.9\"} 9000\n" + "impala_histogram_metric{quantile=\"0.95\"} 9496\n" + "impala_histogram_metric{quantile=\"0.999\"} 9984\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, HistogramBytesPrometheus) { @@ -816,16 +810,14 @@ TEST_F(MetricsTest, HistogramBytesPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2500\n" - "impala_histogram_metric{le=\"0.5\"} 5000\n" - "impala_histogram_metric{le=\"0.7\"} 7500\n" - "impala_histogram_metric{le=\"0.9\"} 9000\n" - "impala_histogram_metric{le=\"0.95\"} 9496\n" - "impala_histogram_metric{le=\"0.999\"} 9984\n" - "impala_histogram_metric_max 10001\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2500\n" + "impala_histogram_metric{quantile=\"0.5\"} 5000\n" + "impala_histogram_metric{quantile=\"0.7\"} 7500\n" + "impala_histogram_metric{quantile=\"0.9\"} 9000\n" + "impala_histogram_metric{quantile=\"0.95\"} 9496\n" + "impala_histogram_metric{quantile=\"0.999\"} 9984\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, HistogramUnitPrometheus) { @@ -842,16 +834,14 @@ TEST_F(MetricsTest, HistogramUnitPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2500\n" - "impala_histogram_metric{le=\"0.5\"} 5000\n" - "impala_histogram_metric{le=\"0.7\"} 7500\n" - "impala_histogram_metric{le=\"0.9\"} 9000\n" - "impala_histogram_metric{le=\"0.95\"} 9496\n" - "impala_histogram_metric{le=\"0.999\"} 9984\n" - "impala_histogram_metric_max 10001\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2500\n" + "impala_histogram_metric{quantile=\"0.5\"} 5000\n" + "impala_histogram_metric{quantile=\"0.7\"} 7500\n" + "impala_histogram_metric{quantile=\"0.9\"} 9000\n" + "impala_histogram_metric{quantile=\"0.95\"} 9496\n" + "impala_histogram_metric{quantile=\"0.999\"} 9984\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, MetricGroupPrometheus) { -- 2.20.1 From 9a48e29df9f562d2f191ab60635b94001fbc1dea Mon Sep 17 00:00:00 2001 From: Guillem Nieto Date: Mon, 30 Sep 2019 11:29:01 +0200 Subject: [PATCH 2/2] IMPALA-8946: Add sum serie to Prometheus summaries To be compliant with Prometheus summaries, a `_sum` serie needs to be added, containing the sum of all the registered values. The current `HdrHistogram` implementation on be::utils does not have the `TotalSum` method (and the required properties to track it). I copied the implementation from be::kudu::utils and added a couple of unit tests which covers the sum method. Also adapted the Prometheus tests to verify that sum serie is properly rendered. --- be/src/util/CMakeLists.txt | 2 ++ be/src/util/hdr-histogram-test.cc | 49 +++++++++++++++++++++++++++++++ be/src/util/hdr-histogram.cc | 4 +++ be/src/util/hdr-histogram.h | 4 +++ be/src/util/histogram-metric.cc | 8 ++++- be/src/util/metrics-test.cc | 15 ++++++---- 6 files changed, 76 insertions(+), 6 deletions(-) create mode 100644 be/src/util/hdr-histogram-test.cc diff --git a/be/src/util/CMakeLists.txt b/be/src/util/CMakeLists.txt index 6ad6059ea..e0bbda0ad 100644 --- a/be/src/util/CMakeLists.txt +++ b/be/src/util/CMakeLists.txt @@ -114,6 +114,7 @@ add_library(UtilTests STATIC filesystem-util-test.cc fixed-size-hash-table-test.cc hdfs-util-test.cc + hdr-histogram-test.cc logging-support-test.cc lru-cache-test.cc metrics-test.cc @@ -172,6 +173,7 @@ ADD_UNIFIED_BE_LSAN_TEST(error-util-test "ErrorMsg.*") ADD_UNIFIED_BE_LSAN_TEST(filesystem-util-test "FilesystemUtil.*") ADD_UNIFIED_BE_LSAN_TEST(fixed-size-hash-table-test "FixedSizeHash.*") ADD_UNIFIED_BE_LSAN_TEST(hdfs-util-test HdfsUtilTest.*) +ADD_UNIFIED_BE_LSAN_TEST(hdr-histogram-test HdrHistogramTest.*) # internal-queue-test has a non-standard main(), so it needs a small amount of thought # to use a unified executable ADD_BE_LSAN_TEST(internal-queue-test) diff --git a/be/src/util/hdr-histogram-test.cc b/be/src/util/hdr-histogram-test.cc new file mode 100644 index 000000000..6fe8be212 --- /dev/null +++ b/be/src/util/hdr-histogram-test.cc @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "util/hdr-histogram.h" +#include "testutil/gtest-util.h" + +namespace impala { + +class HdrHistogramTest : public testing::Test {}; + +TEST_F(HdrHistogramTest, HistogramTotalSum) { + constexpr uint64_t MAX_VALUE = 10000; + HdrHistogram* histogram = new HdrHistogram(MAX_VALUE, 3); + + histogram->Increment(100); + histogram->IncrementBy(10, 50); + EXPECT_EQ(histogram->TotalSum(), 600); + + histogram->Increment(24000); + EXPECT_EQ(histogram->TotalSum(), 24600); +} + +TEST_F(HdrHistogramTest, HistogramTotalSumIsMovedToNewHdrHistogram) { + constexpr uint64_t MAX_VALUE = 10000; + HdrHistogram* histogram = new HdrHistogram(MAX_VALUE, 3); + + histogram->Increment(100); + EXPECT_EQ(histogram->TotalSum(), 100); + + HdrHistogram* new_histogram = new HdrHistogram(*histogram); + EXPECT_EQ(new_histogram->TotalSum(), 100); + + new_histogram->Increment(200); + EXPECT_EQ(new_histogram->TotalSum(), 300); +} +} // namespace impala diff --git a/be/src/util/hdr-histogram.cc b/be/src/util/hdr-histogram.cc index 1427aac14..0752275b9 100644 --- a/be/src/util/hdr-histogram.cc +++ b/be/src/util/hdr-histogram.cc @@ -46,6 +46,7 @@ HdrHistogram::HdrHistogram(uint64_t highest_trackable_value, int num_significant sub_bucket_half_count_(0), sub_bucket_mask_(0), total_count_(0), + total_sum_(0), min_value_(std::numeric_limits::max()), max_value_(0), counts_(0) { @@ -62,6 +63,7 @@ HdrHistogram::HdrHistogram(const HdrHistogram& other) sub_bucket_half_count_(0), sub_bucket_mask_(0), total_count_(0), + total_sum_(0), min_value_(std::numeric_limits::max()), max_value_(0), counts_(0) { @@ -69,6 +71,7 @@ HdrHistogram::HdrHistogram(const HdrHistogram& other) // Not a consistent snapshot but we try to roughly keep it close. // Copy the min first. + NoBarrier_Store(&total_sum_, NoBarrier_Load(&other.total_sum_)); NoBarrier_Store(&min_value_, NoBarrier_Load(&other.min_value_)); uint64_t total_copied_count = 0; // Copy the counts in order of ascending magnitude. @@ -154,6 +157,7 @@ void HdrHistogram::IncrementBy(int64_t value, int64_t count) { // Increment bucket & total. NoBarrier_AtomicIncrement(&counts_[counts_index], count); NoBarrier_AtomicIncrement(&total_count_, count); + NoBarrier_AtomicIncrement(&total_sum_, value * count); // Update min, if needed. { diff --git a/be/src/util/hdr-histogram.h b/be/src/util/hdr-histogram.h index 3ce5a7b87..ba479adee 100644 --- a/be/src/util/hdr-histogram.h +++ b/be/src/util/hdr-histogram.h @@ -102,6 +102,9 @@ class HdrHistogram { // Count of all events recorded. uint64_t TotalCount() const { return base::subtle::NoBarrier_Load(&total_count_); } + // Sum of all events recorded. + uint64_t TotalSum() const { return base::subtle::NoBarrier_Load(&total_sum_); } + // Return number of items at index. uint64_t CountAt(int bucket_index, int sub_bucket_index) const; @@ -183,6 +186,7 @@ class HdrHistogram { // Also hot. base::subtle::Atomic64 total_count_; + base::subtle::Atomic64 total_sum_; base::subtle::Atomic64 min_value_; base::subtle::Atomic64 max_value_; gscoped_array counts_; diff --git a/be/src/util/histogram-metric.cc b/be/src/util/histogram-metric.cc index 09fade712..21c7a8f5d 100644 --- a/be/src/util/histogram-metric.cc +++ b/be/src/util/histogram-metric.cc @@ -128,7 +128,13 @@ TMetricKind::type HistogramMetric::ToPrometheus( << "\n"; } - *value << name << "_count " << histogram_->TotalCount(); + *value << name << "_count " << histogram_->TotalCount() << "\n"; + + if (IsUnitTimeBased(unit_)) { + *value << name << "_sum " << ConvertToPrometheusSecs(histogram_->TotalSum(), unit_); + } else { + *value << name << "_sum " << histogram_->TotalSum(); + } } *metric_kind << "# TYPE " << name << " summary"; diff --git a/be/src/util/metrics-test.cc b/be/src/util/metrics-test.cc index 8a25f0974..02080fef2 100644 --- a/be/src/util/metrics-test.cc +++ b/be/src/util/metrics-test.cc @@ -744,7 +744,8 @@ TEST_F(MetricsTest, HistogramPrometheus) { "impala_histogram_metric{quantile=\"0.9\"} 9\n" "impala_histogram_metric{quantile=\"0.95\"} 9.496\n" "impala_histogram_metric{quantile=\"0.999\"} 9.984\n" - "impala_histogram_metric_count 10002", + "impala_histogram_metric_count 10002\n" + "impala_histogram_metric_sum 50015", "", "summary"); } @@ -768,7 +769,8 @@ TEST_F(MetricsTest, HistogramTimeNSPrometheus) { "impala_histogram_metric{quantile=\"0.9\"} 9e-06\n" "impala_histogram_metric{quantile=\"0.95\"} 9.496e-06\n" "impala_histogram_metric{quantile=\"0.999\"} 9.984e-06\n" - "impala_histogram_metric_count 10002", + "impala_histogram_metric_count 10002\n" + "impala_histogram_metric_sum 0.050015", "", "summary"); } @@ -792,7 +794,8 @@ TEST_F(MetricsTest, HistogramTimeSPrometheus) { "impala_histogram_metric{quantile=\"0.9\"} 9000\n" "impala_histogram_metric{quantile=\"0.95\"} 9496\n" "impala_histogram_metric{quantile=\"0.999\"} 9984\n" - "impala_histogram_metric_count 10002", + "impala_histogram_metric_count 10002\n" + "impala_histogram_metric_sum 50015001", "", "summary"); } @@ -816,7 +819,8 @@ TEST_F(MetricsTest, HistogramBytesPrometheus) { "impala_histogram_metric{quantile=\"0.9\"} 9000\n" "impala_histogram_metric{quantile=\"0.95\"} 9496\n" "impala_histogram_metric{quantile=\"0.999\"} 9984\n" - "impala_histogram_metric_count 10002", + "impala_histogram_metric_count 10002\n" + "impala_histogram_metric_sum 50015001", "", "summary"); } @@ -840,7 +844,8 @@ TEST_F(MetricsTest, HistogramUnitPrometheus) { "impala_histogram_metric{quantile=\"0.9\"} 9000\n" "impala_histogram_metric{quantile=\"0.95\"} 9496\n" "impala_histogram_metric{quantile=\"0.999\"} 9984\n" - "impala_histogram_metric_count 10002", + "impala_histogram_metric_count 10002\n" + "impala_histogram_metric_sum 50015001", "", "summary"); } -- 2.20.1