From 43a039bfcf9ca0cb7ad04ac568fb9cc95e11ee71 Mon Sep 17 00:00:00 2001 From: Guillem Nieto Date: Fri, 13 Sep 2019 11:51:54 +0200 Subject: [PATCH] IMPALA-8946: Fix histogram rendering to Prometheus Prometheus has two formats that allows to calculate quantiles (on client side): Histograms and Summaries. The former requires to pick a set of buckets, while the latter only requires to specifiy which quantiles (percentiles) the application wants to provide. The difference between summaries and histograms on Prometheus can be found here: https://prometheus.io/docs/practices/histograms/ Previous to this changes, Impala histograms were rendered as Prometheus histograms, but it was calculating percentiles instead of bucketing the recorded values, which is not what Prometheus clients expects. From now on, Impala histograms will be rendered as Prometheus summaries instead, which expects quantiles. --- be/src/util/histogram-metric.cc | 44 ++++++++---------- be/src/util/metrics-test.cc | 80 +++++++++++++++------------------ 2 files changed, 53 insertions(+), 71 deletions(-) diff --git a/be/src/util/histogram-metric.cc b/be/src/util/histogram-metric.cc index 012711dfc..09fade712 100644 --- a/be/src/util/histogram-metric.cc +++ b/be/src/util/histogram-metric.cc @@ -80,66 +80,58 @@ TMetricKind::type HistogramMetric::ToPrometheus( // check if unit its 'TIME_MS','TIME_US' or 'TIME_NS' and convert it to seconds, // this is because prometheus only supports time format in seconds if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.2\"} " + *value << name << "{quantile=\"0.2\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(25), unit_) << "\n"; } else { - *value << name << "{le=\"0.2\"} " << histogram_->ValueAtPercentile(25) << "\n"; + *value << name << "{quantile=\"0.2\"} " << histogram_->ValueAtPercentile(25) + << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.5\"} " + *value << name << "{quantile=\"0.5\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(50), unit_) << "\n"; } else { - *value << name << "{le=\"0.5\"} " << histogram_->ValueAtPercentile(50) << "\n"; + *value << name << "{quantile=\"0.5\"} " << histogram_->ValueAtPercentile(50) + << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.7\"} " + *value << name << "{quantile=\"0.7\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(75), unit_) << "\n"; } else { - *value << name << "{le=\"0.7\"} " << histogram_->ValueAtPercentile(75) << "\n"; + *value << name << "{quantile=\"0.7\"} " << histogram_->ValueAtPercentile(75) + << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.9\"} " + *value << name << "{quantile=\"0.9\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(90), unit_) << "\n"; } else { - *value << name << "{le=\"0.9\"} " << histogram_->ValueAtPercentile(90) << "\n"; + *value << name << "{quantile=\"0.9\"} " << histogram_->ValueAtPercentile(90) + << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.95\"} " + *value << name << "{quantile=\"0.95\"} " << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(95), unit_) << "\n"; } else { - *value << name << "{le=\"0.95\"} " << histogram_->ValueAtPercentile(95) << "\n"; - } - - if (IsUnitTimeBased(unit_)) { - *value << name << "{le=\"0.999\"} " - << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(99.9), unit_) + *value << name << "{quantile=\"0.95\"} " << histogram_->ValueAtPercentile(95) << "\n"; - } else { - *value << name << "{le=\"0.999\"} " << histogram_->ValueAtPercentile(99.9) << "\n"; } if (IsUnitTimeBased(unit_)) { - *value << name << "_max " << ConvertToPrometheusSecs(histogram_->MaxValue(), unit_) + *value << name << "{quantile=\"0.999\"} " + << ConvertToPrometheusSecs(histogram_->ValueAtPercentile(99.9), unit_) << "\n"; } else { - *value << name << "_max " << histogram_->MaxValue() << "\n"; - } - - if (IsUnitTimeBased(unit_)) { - *value << name << "_min " << ConvertToPrometheusSecs(histogram_->MinValue(), unit_) + *value << name << "{quantile=\"0.999\"} " << histogram_->ValueAtPercentile(99.9) << "\n"; - } else { - *value << name << "_min " << histogram_->MinValue() << "\n"; } *value << name << "_count " << histogram_->TotalCount(); } - *metric_kind << "# TYPE " << name << " histogram"; + *metric_kind << "# TYPE " << name << " summary"; return TMetricKind::HISTOGRAM; } diff --git a/be/src/util/metrics-test.cc b/be/src/util/metrics-test.cc index 92fc08f02..8a25f0974 100644 --- a/be/src/util/metrics-test.cc +++ b/be/src/util/metrics-test.cc @@ -738,16 +738,14 @@ TEST_F(MetricsTest, HistogramPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2.5\n" - "impala_histogram_metric{le=\"0.5\"} 5\n" - "impala_histogram_metric{le=\"0.7\"} 7.5\n" - "impala_histogram_metric{le=\"0.9\"} 9\n" - "impala_histogram_metric{le=\"0.95\"} 9.496\n" - "impala_histogram_metric{le=\"0.999\"} 9.984\n" - "impala_histogram_metric_max 10.001\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2.5\n" + "impala_histogram_metric{quantile=\"0.5\"} 5\n" + "impala_histogram_metric{quantile=\"0.7\"} 7.5\n" + "impala_histogram_metric{quantile=\"0.9\"} 9\n" + "impala_histogram_metric{quantile=\"0.95\"} 9.496\n" + "impala_histogram_metric{quantile=\"0.999\"} 9.984\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, HistogramTimeNSPrometheus) { @@ -764,16 +762,14 @@ TEST_F(MetricsTest, HistogramTimeNSPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2.5e-06\n" - "impala_histogram_metric{le=\"0.5\"} 5e-06\n" - "impala_histogram_metric{le=\"0.7\"} 7.5e-06\n" - "impala_histogram_metric{le=\"0.9\"} 9e-06\n" - "impala_histogram_metric{le=\"0.95\"} 9.496e-06\n" - "impala_histogram_metric{le=\"0.999\"} 9.984e-06\n" - "impala_histogram_metric_max 1.0001e-05\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2.5e-06\n" + "impala_histogram_metric{quantile=\"0.5\"} 5e-06\n" + "impala_histogram_metric{quantile=\"0.7\"} 7.5e-06\n" + "impala_histogram_metric{quantile=\"0.9\"} 9e-06\n" + "impala_histogram_metric{quantile=\"0.95\"} 9.496e-06\n" + "impala_histogram_metric{quantile=\"0.999\"} 9.984e-06\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, HistogramTimeSPrometheus) { @@ -790,16 +786,14 @@ TEST_F(MetricsTest, HistogramTimeSPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2500\n" - "impala_histogram_metric{le=\"0.5\"} 5000\n" - "impala_histogram_metric{le=\"0.7\"} 7500\n" - "impala_histogram_metric{le=\"0.9\"} 9000\n" - "impala_histogram_metric{le=\"0.95\"} 9496\n" - "impala_histogram_metric{le=\"0.999\"} 9984\n" - "impala_histogram_metric_max 10001\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2500\n" + "impala_histogram_metric{quantile=\"0.5\"} 5000\n" + "impala_histogram_metric{quantile=\"0.7\"} 7500\n" + "impala_histogram_metric{quantile=\"0.9\"} 9000\n" + "impala_histogram_metric{quantile=\"0.95\"} 9496\n" + "impala_histogram_metric{quantile=\"0.999\"} 9984\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, HistogramBytesPrometheus) { @@ -816,16 +810,14 @@ TEST_F(MetricsTest, HistogramBytesPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2500\n" - "impala_histogram_metric{le=\"0.5\"} 5000\n" - "impala_histogram_metric{le=\"0.7\"} 7500\n" - "impala_histogram_metric{le=\"0.9\"} 9000\n" - "impala_histogram_metric{le=\"0.95\"} 9496\n" - "impala_histogram_metric{le=\"0.999\"} 9984\n" - "impala_histogram_metric_max 10001\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2500\n" + "impala_histogram_metric{quantile=\"0.5\"} 5000\n" + "impala_histogram_metric{quantile=\"0.7\"} 7500\n" + "impala_histogram_metric{quantile=\"0.9\"} 9000\n" + "impala_histogram_metric{quantile=\"0.95\"} 9496\n" + "impala_histogram_metric{quantile=\"0.999\"} 9984\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, HistogramUnitPrometheus) { @@ -842,16 +834,14 @@ TEST_F(MetricsTest, HistogramUnitPrometheus) { std::stringstream val; metrics.ToPrometheus(true, &val); AssertPrometheus(val, "impala_histogram_metric", - "impala_histogram_metric{le=\"0.2\"} 2500\n" - "impala_histogram_metric{le=\"0.5\"} 5000\n" - "impala_histogram_metric{le=\"0.7\"} 7500\n" - "impala_histogram_metric{le=\"0.9\"} 9000\n" - "impala_histogram_metric{le=\"0.95\"} 9496\n" - "impala_histogram_metric{le=\"0.999\"} 9984\n" - "impala_histogram_metric_max 10001\n" - "impala_histogram_metric_min 0\n" + "impala_histogram_metric{quantile=\"0.2\"} 2500\n" + "impala_histogram_metric{quantile=\"0.5\"} 5000\n" + "impala_histogram_metric{quantile=\"0.7\"} 7500\n" + "impala_histogram_metric{quantile=\"0.9\"} 9000\n" + "impala_histogram_metric{quantile=\"0.95\"} 9496\n" + "impala_histogram_metric{quantile=\"0.999\"} 9984\n" "impala_histogram_metric_count 10002", - "", "histogram"); + "", "summary"); } TEST_F(MetricsTest, MetricGroupPrometheus) { -- 2.20.1