diff --git a/perf/README.md b/perf/README.md new file mode 100644 index 0000000..7e6ecef --- /dev/null +++ b/perf/README.md @@ -0,0 +1,38 @@ +# Apache Kafka + +## Performance testing + +To run the Kafka performance tests you can update the server.properties to dump the metrics of the test to CSV files. + +Add this to your brokers' server.properties file setting the values approriate for your enviornment. + + kafka.metrics.polling.interval.secs=5 + kafka.metrics.reporters=kafka.metrics.KafkaCSVMetricsReporter + kafka.csv.metrics.dir=/tmp/kafka_metrics + kafka.csv.metrics.reporter.enabled=true + +Before running the tests decide on what settings make the test valid for your implementation for producers and consumers. + + bin/kafka-producer-perf-test.sh --help + bin/kafka-consumer-perf-test.sh --help + +You may want to review the wiki page on performance testing https://cwiki.apache.org/confluence/display/KAFKA/Performance+testing for more detail about this or review the help that each test provides from the command line. + +After you have run your tests and you have your CSV results you can now graph the metrics using the `draw-performance-graphs.r` script. + +You need to have R http://www.r-project.org/ installed and X11 too (for Mac users this is no longer shipped by Apple but available here http://xquartz.macosforge.org/landing/) + +Once these are installed then you can go to the folder where your metrics csv where created (e.g. /tmp/kafka_metrics) and then run the script (assuming you have kafka cloned e.g. /opt/apache/kafka) + + cd /tmp/kafka_metrics + RScript /opt/apache/kafka/perf/draw-performance-graphs.r TRUE Produce-RequestsPerSec.csv + +This will generate an image of all metrics in one png and also a png for each metric seperatly (metrics being total count, total mean, 1min, 5min, 15min moving averages) + +If you want to generate a graph for more than one CSV file then add it as another argument to the command line. + + RScript /opt/apache/kafka/perf/draw-performance-graphs.r TRUE Produce-RequestsPerSec.csv test-MessagesInPerSec.csv + +If you want to generate pngs for every CSV in the directory then run + + RScript /opt/apache/kafka/perf/draw-performance-graphs.r FALSE diff --git a/perf/draw-performance-graphs.r b/perf/draw-performance-graphs.r new file mode 100644 index 0000000..bbaacb1 --- /dev/null +++ b/perf/draw-performance-graphs.r @@ -0,0 +1,73 @@ +#!/usr/bin/env Rscript +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#needed functions +plotAndSummarize <- function(topicName,data){ + timeIndex <- grep("time",colnames(data)) + time <- data[,timeIndex[1]] + subset <- data.frame(data[,-c(timeIndex)])##subset of data that isn't time + numCols <- length(colnames(subset)) + + png(paste(topicName,".png",sep = ""), width = 800, height = 800*numCols) + par(mfrow=c(numCols,1)) + for(i in 1:numCols){ + ## time series + plot(time, subset[,i], xlab = "time", ylab = colnames(subset)[i], main = paste(topicName,colnames(subset)[i], sep = " "), type = "l") + } + dev.off() + + names <- colnames(subset) + for(i in 1:numCols){ + ## time series + png(paste(topicName,"_",names[i],".png",sep = ""), width = 800, height = 800) + plot(time, subset[,i], xlab = "time", ylab = colnames(subset)[i], main = paste(topicName,colnames(subset)[i], sep = " "), type = "l") + dev.off() + } +} + +testingMetrics <- function(topicName, path){ + data <- read.csv(path, header = TRUE, sep = ",") + numCols <- length(colnames(data)) + if(sum(grep("time",colnames(data)))>0){ + plotAndSummarize(topicName, data) + } else{ + stop("Missing time arg.") + } +} + +strippedName <- function(files){ + names <- files + for (i in 1:length(names)){ + names[i] = read.table(text = names[i], sep = ".", as.is = TRUE)$V1 + } + names +} + +args <- commandArgs(TRUE) + +if(args[1]){ + files <- args[-1] + fileNames <- strippedName(args[-1]) + for(i in 1:length(files)){ + if(sum(grep(".csv",files[i]) == 1)) testingMetrics(fileNames[i],files[i]) + } +} else{ + files <- list.files() #assume r is running in the same directory as the files + fileNames <- strippedName(files) + for(i in 1:length(files)){ + if(sum(grep(".csv",files[i]) == 1)) testingMetrics(fileNames[i],files[i]) + } +} \ No newline at end of file