From caeb82f0da3c782dfefa1c87d8129a009a591f8f Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Fri, 17 Oct 2014 14:01:02 -0700 Subject: [PATCH 1/9] Vagrant setup --- Vagrantfile | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ vagrant/README.md | 33 ++++++++++++++++++++++++++++++ vagrant/broker.sh | 34 +++++++++++++++++++++++++++++++ vagrant/zk.sh | 28 +++++++++++++++++++++++++ 4 files changed, 156 insertions(+) create mode 100644 Vagrantfile create mode 100644 vagrant/README.md create mode 100644 vagrant/broker.sh create mode 100644 vagrant/zk.sh diff --git a/Vagrantfile b/Vagrantfile new file mode 100644 index 0000000..a53de78 --- /dev/null +++ b/Vagrantfile @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- mode: ruby -*- +# vi: set ft=ruby : + +# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! +VAGRANTFILE_API_VERSION = "2" + +# TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered. +Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| + config.vm.box = "precise64" + + # The url from where the 'config.vm.box' box will be fetched if it + # doesn't already exist on the user's system. + config.vm.box_url = "http://files.vagrantup.com/precise64.box" + + config.vm.define "zookeeper" do |zookeeper| + zookeeper.vm.network :private_network, ip: "192.168.50.5" + zookeeper.vm.provider :virtualbox do |vb| + vb.customize ["modifyvm", :id, "--memory", "512"] + end + zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + end + + config.vm.define "brokerOne" do |brokerOne| + brokerOne.vm.network :private_network, ip: "192.168.50.10" + brokerOne.vm.provider :virtualbox do |vb| + vb.customize ["modifyvm", :id, "--memory", "512"] + end + brokerOne.vm.provision "shell", path: "vagrant/broker.sh", :args => "1" + end + + config.vm.define "brokerTwo" do |brokerTwo| + brokerTwo.vm.network :private_network, ip: "192.168.50.20" + brokerTwo.vm.provider :virtualbox do |vb| + vb.customize ["modifyvm", :id, "--memory", "512"] + end + brokerTwo.vm.provision "shell", path: "vagrant/broker.sh", :args => "2" + end + + config.vm.define "brokerThree" do |brokerThree| + brokerThree.vm.network :private_network, ip: "192.168.50.30" + brokerThree.vm.provider :virtualbox do |vb| + vb.customize ["modifyvm", :id, "--memory", "512"] + end + brokerThree.vm.provision "shell", path: "vagrant/broker.sh", :args => "3" + end + +end diff --git a/vagrant/README.md b/vagrant/README.md new file mode 100644 index 0000000..08d5236 --- /dev/null +++ b/vagrant/README.md @@ -0,0 +1,33 @@ +# Apache Kafka # + +Using Vagrant to get up and running. + +1) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) +2) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) + +In the main kafka folder + +1) ./sbt update +2) ./sbt package +3) ./sbt assembly-package-dependency +4) vagrant up + +once this is done +* Zookeeper will be running 192.168.50.5 +* Broker 1 on 192.168.50.10 +* Broker 2 on 192.168.50.20 +* Broker 3 on 192.168.50.30 + +When you are all up and running you will be back at a command brompt. + +If you want you can login to the machines using vagrant ssh but you don't need to. + +You can access the brokers and zookeeper by their IP + +e.g. + +bin/kafka-topics.sh --create --zookeeper 192.168.50.5:2181 --replication-factor 3 --partitions 1 --topic sandbox + +bin/kafka-console-producer.sh --broker-list 192.168.50.10:9092,192.168.50.20:9092,192.168.50.30:9092 --topic sandbox + +bin/kafka-console-consumer.sh --zookeeper 192.168.50.5:2181 --topic sandbox --from-beginning \ No newline at end of file diff --git a/vagrant/broker.sh b/vagrant/broker.sh new file mode 100644 index 0000000..9be2a34 --- /dev/null +++ b/vagrant/broker.sh @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash +apt-get -y update +apt-get install -y software-properties-common python-software-properties +add-apt-repository -y ppa:webupd8team/java +apt-get -y update +/bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections +apt-get -y install oracle-java7-installer oracle-java7-set-default + +chmod a+rw /opt +cd /opt +ln -s /vagrant kafka +cd kafka +IP=$(ifconfig | grep 'inet addr:'| grep 168 | grep 192|cut -d: -f2 | awk '{ print $1}') +sed 's/broker.id=0/'broker.id=$1'/' /opt/kafka/config/server.properties > /tmp/prop1.tmp +sed 's/#advertised.host.name=/'advertised.host.name=$IP'/' /tmp/prop1.tmp > /tmp/prop2.tmp +sed 's/#host.name=localhost/'host.name=$IP'/' /tmp/prop2.tmp > /tmp/prop3.tmp +sed 's/zookeeper.connect=localhost:2181/'zookeeper.connect=192.168.50.5:2181'/' /tmp/prop3.tmp > /opt/server.properties + +bin/kafka-server-start.sh /opt/server.properties 1>> /tmp/broker.log 2>> /tmp/broker.log & diff --git a/vagrant/zk.sh b/vagrant/zk.sh new file mode 100644 index 0000000..3cc8dfe --- /dev/null +++ b/vagrant/zk.sh @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash +apt-get -y update +apt-get install -y software-properties-common python-software-properties +add-apt-repository -y ppa:webupd8team/java +apt-get -y update +/bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections +apt-get -y install oracle-java7-installer oracle-java7-set-default + +chmod a+rw /opt +cd /opt +ln -s /vagrant kafka +cd kafka +bin/zookeeper-server-start.sh config/zookeeper.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & \ No newline at end of file -- 2.1.2 From a9289c5f9239f016339b1366f8032fcbc7d12441 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Tue, 21 Oct 2014 19:28:04 -0700 Subject: [PATCH 2/9] Add basic EC2 support, cleaner Vagrantfile, README cleanup, etc. --- .gitignore | 3 +- Vagrantfile | 90 ++++++++++++++++++++++++++++++++++++++----------------- vagrant/README.md | 63 +++++++++++++++++++++++++++----------- 3 files changed, 110 insertions(+), 46 deletions(-) diff --git a/.gitignore b/.gitignore index 99b32a6..1a3c833 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,5 @@ TAGS .settings .gradle kafka.ipr -kafka.iws \ No newline at end of file +kafka.iws +Vagrantfile.local \ No newline at end of file diff --git a/Vagrantfile b/Vagrantfile index a53de78..fffeb9f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -4,7 +4,7 @@ # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software @@ -15,47 +15,81 @@ # -*- mode: ruby -*- # vi: set ft=ruby : +require 'socket' + # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! VAGRANTFILE_API_VERSION = "2" +# General config +num_brokers = 3 + +# EC2 +ec2_access_key = nil +ec2_secret_key = nil +ec2_keypair_name = nil +ec2_keypair_file = nil + +ec2_region = "us-east-1" +ec2_az = nil # Uses set by AWS +ec2_ami = "ami-9eaa1cf6" +ec2_instance_type = "m3.medium" +ec2_user = "ubuntu" + + +if File.exists?("Vagrantfile.local") then + eval(File.read("Vagrantfile.local"), binding, "Vagrantfile.local") +end + # TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered. Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| - config.vm.box = "precise64" + ## Provider-specific global configs - # The url from where the 'config.vm.box' box will be fetched if it - # doesn't already exist on the user's system. - config.vm.box_url = "http://files.vagrantup.com/precise64.box" + config.vm.provider :virtualbox do |vb,override| + override.vm.box = "ubuntu/trusty64" + vb.customize ["modifyvm", :id, "--memory", "512"] + end - config.vm.define "zookeeper" do |zookeeper| - zookeeper.vm.network :private_network, ip: "192.168.50.5" - zookeeper.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--memory", "512"] - end - zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + config.vm.provider :aws do |aws,override| + # The "box" is specified as an AMI + override.vm.box = "dummy" + override.vm.box_url = "https://github.com/mitchellh/vagrant-aws/raw/master/dummy.box" + + override.ssh.username = ec2_user + override.ssh.private_key_path = ec2_keypair_file + + aws.access_key_id = ec2_access_key + aws.secret_access_key = ec2_secret_key + aws.keypair_name = ec2_keypair_name + + aws.region = ec2_region + aws.availability_zone = ec2_az + aws.instance_type = ec2_instance_type + aws.ami = ec2_ami + + # Exclude some directories that can grow very large from syncing + config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['system_test/', 'logs/'] end - config.vm.define "brokerOne" do |brokerOne| - brokerOne.vm.network :private_network, ip: "192.168.50.10" - brokerOne.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--memory", "512"] + def name_node(node, name) + node.vm.provider :aws do |aws| + aws.tags = { 'Name' => "kafka-vagrant-" + Socket.gethostname + "-" + name } end - brokerOne.vm.provision "shell", path: "vagrant/broker.sh", :args => "1" end - config.vm.define "brokerTwo" do |brokerTwo| - brokerTwo.vm.network :private_network, ip: "192.168.50.20" - brokerTwo.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--memory", "512"] - end - brokerTwo.vm.provision "shell", path: "vagrant/broker.sh", :args => "2" + ## Cluster definition + config.vm.define "zookeeper" do |zookeeper| + zookeeper.vm.network :private_network, ip: "192.168.50.5" + zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + name_node(zookeeper, "zookeeper") end - config.vm.define "brokerThree" do |brokerThree| - brokerThree.vm.network :private_network, ip: "192.168.50.30" - brokerThree.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--memory", "512"] + (1..num_brokers).each { |i| + brokerName = "broker" + i.to_s + config.vm.define brokerName do |broker| + broker.vm.network :private_network, ip: "192.168.50." + (50 + i).to_s + broker.vm.provision "shell", path: "vagrant/broker.sh", :args => i.to_s + name_node(broker, brokerName) end - brokerThree.vm.provision "shell", path: "vagrant/broker.sh", :args => "3" - end + } end diff --git a/vagrant/README.md b/vagrant/README.md index 08d5236..932434a 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -2,32 +2,61 @@ Using Vagrant to get up and running. -1) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) -2) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) +1) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) +2) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) -In the main kafka folder +In the main Kafka folder -1) ./sbt update -2) ./sbt package -3) ./sbt assembly-package-dependency -4) vagrant up + $ gradle + $ ./gradlew jar + $ vagrant up + +Once this completes: -once this is done * Zookeeper will be running 192.168.50.5 -* Broker 1 on 192.168.50.10 -* Broker 2 on 192.168.50.20 -* Broker 3 on 192.168.50.30 +* Broker 1 on 192.168.50.50 +* Broker 2 on 192.168.50.51 +* Broker 3 on 192.168.50.52 -When you are all up and running you will be back at a command brompt. +When you are all up and running you will be back at a command brompt. If you want you can login to the machines using vagrant ssh but you don't need to. -You can access the brokers and zookeeper by their IP +You can access the brokers and zookeeper by their IP, e.g. + + bin/kafka-topics.sh --create --zookeeper 192.168.50.5:2181 --replication-factor 3 --partitions 1 --topic sandbox + + bin/kafka-console-producer.sh --broker-list 192.168.50.50:9092,192.168.50.51:9092,192.168.50.52:9092 --topic sandbox + + bin/kafka-console-consumer.sh --zookeeper 192.168.50.5:2181 --topic sandbox --from-beginning + +## Configuration ## + +You can override some default settings by specifying the values in +`Vagrantfile.local`. It is interpreted as a Ruby file, although you'll probably +only ever need to change a few simple configuration variables. Some values you +might want to override: + +* `NUM_BROKERS` - Number of broker instances to run + + +## Using Other Providers ## + +EC2 + +Install the `vagrant-aws` plugin to provide EC2 support: -e.g. + $ vagrant plugin install vagrant-aws -bin/kafka-topics.sh --create --zookeeper 192.168.50.5:2181 --replication-factor 3 --partitions 1 --topic sandbox +Next, configure parameters in `Vagrantfile.local`, at a minimum +`ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and +`ec2_security_groups`. Note that you'll have to setup a reasonable security +group yourself. All other settings have reasonable defaults for setting up an +Ubuntu-based cluster, but you may want to customize instance type, region, AMI, +etc. Then start things up, but specify the aws provider: -bin/kafka-console-producer.sh --broker-list 192.168.50.10:9092,192.168.50.20:9092,192.168.50.30:9092 --topic sandbox + $ vagrant up --provider=aws -bin/kafka-console-consumer.sh --zookeeper 192.168.50.5:2181 --topic sandbox --from-beginning \ No newline at end of file +Your instances should get tagged with a name including your hostname to make +them identifiable and make it easier to track instances in the AWS management +console. -- 2.1.2 From b6fd70c2141dbd94fc69186ce60e0524ebccc5bf Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 22 Oct 2014 15:27:02 -0700 Subject: [PATCH 3/9] Better naming, hostmanager for routable VM names, vagrant-cachier to reduce startup cost, cleanup provisioning scripts, initial support for multiple zookeepers, general cleanup. --- .gitignore | 3 +- Vagrantfile | 72 +++++++++++++++++++++++++++++++++++++---------- vagrant/README.md | 84 ++++++++++++++++++++++++++++++++++++++++++------------- vagrant/base.sh | 43 ++++++++++++++++++++++++++++ vagrant/broker.sh | 38 ++++++++++++++----------- vagrant/zk.sh | 18 ++++-------- 6 files changed, 193 insertions(+), 65 deletions(-) create mode 100644 vagrant/base.sh diff --git a/.gitignore b/.gitignore index 1a3c833..45c17cb 100644 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,5 @@ TAGS .gradle kafka.ipr kafka.iws -Vagrantfile.local \ No newline at end of file +.vagrant +Vagrantfile.local diff --git a/Vagrantfile b/Vagrantfile index fffeb9f..b126054 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -21,6 +21,8 @@ require 'socket' VAGRANTFILE_API_VERSION = "2" # General config +enable_dns = false +num_zookeepers = 1 num_brokers = 3 # EC2 @@ -34,19 +36,41 @@ ec2_az = nil # Uses set by AWS ec2_ami = "ami-9eaa1cf6" ec2_instance_type = "m3.medium" ec2_user = "ubuntu" +ec2_security_groups = nil - -if File.exists?("Vagrantfile.local") then - eval(File.read("Vagrantfile.local"), binding, "Vagrantfile.local") +local_config_file = File.join(File.dirname(__FILE__), "Vagrantfile.local") +if File.exists?(local_config_file) then + eval(File.read(local_config_file), binding, "Vagrantfile.local") end # TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered. Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| - ## Provider-specific global configs + config.hostmanager.enabled = true + config.hostmanager.manage_host = enable_dns + config.hostmanager.include_offline = false + ## Provider-specific global configs config.vm.provider :virtualbox do |vb,override| override.vm.box = "ubuntu/trusty64" - vb.customize ["modifyvm", :id, "--memory", "512"] + + override.hostmanager.ignore_private_ip = false + + # Brokers started with the standard script currently set Xms and Xmx to 1G, + # plus we need some extra head room. + vb.customize ["modifyvm", :id, "--memory", "1280"] + + if Vagrant.has_plugin?("vagrant-cachier") + config.cache.scope = :box + # Besides the defaults, we use a custom cache to handle the Oracle JDK + # download, which downloads via wget during an apt install. Because of the + # way the installer ends up using its cache directory, we need to jump + # through some hoops instead of just specifying a cache directly -- we + # share to a temporary location and the provisioning scripts symlink data + # to the right location. + config.cache.enable :generic, { + "oracle-jdk7" => { cache_dir: "/tmp/oracle-jdk7-installer-cache" }, + } + end end config.vm.provider :aws do |aws,override| @@ -54,6 +78,8 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| override.vm.box = "dummy" override.vm.box_url = "https://github.com/mitchellh/vagrant-aws/raw/master/dummy.box" + override.hostmanager.ignore_private_ip = true + override.ssh.username = ec2_user override.ssh.private_key_path = ec2_keypair_file @@ -65,30 +91,46 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| aws.availability_zone = ec2_az aws.instance_type = ec2_instance_type aws.ami = ec2_ami + aws.security_groups = ec2_security_groups # Exclude some directories that can grow very large from syncing config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['system_test/', 'logs/'] end def name_node(node, name) + node.vm.hostname = name node.vm.provider :aws do |aws| aws.tags = { 'Name' => "kafka-vagrant-" + Socket.gethostname + "-" + name } end end ## Cluster definition - config.vm.define "zookeeper" do |zookeeper| - zookeeper.vm.network :private_network, ip: "192.168.50.5" - zookeeper.vm.provision "shell", path: "vagrant/zk.sh" - name_node(zookeeper, "zookeeper") - end + zookeepers = [] + (1..num_zookeepers).each { |i| + name = "zk" + i.to_s + zookeepers.push(name) + config.vm.define name do |zookeeper| + name_node(zookeeper, name) + ip_address = "192.168.50." + (10 + i).to_s + zookeeper.vm.network :private_network, ip: ip_address + zookeeper.vm.provision "shell", path: "vagrant/base.sh" + zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + end + } (1..num_brokers).each { |i| - brokerName = "broker" + i.to_s - config.vm.define brokerName do |broker| - broker.vm.network :private_network, ip: "192.168.50." + (50 + i).to_s - broker.vm.provision "shell", path: "vagrant/broker.sh", :args => i.to_s - name_node(broker, brokerName) + name = "broker" + i.to_s + config.vm.define name do |broker| + name_node(broker, name) + ip_address = "192.168.50." + (50 + i).to_s + broker.vm.network :private_network, ip: ip_address + # We need to be careful about what we list as the publicly routable + # address since this is registered in ZK and handed out to clients. If + # host DNS isn't setup, we shouldn't use hostnames -- IP addresses must be + # used to support clients running on the host. + zookeeper_connect = zookeepers.map{ |zk_addr| zk_addr + ":2181"}.join(",") + broker.vm.provision "shell", path: "vagrant/base.sh" + broker.vm.provision "shell", path: "vagrant/broker.sh", :args => [i.to_s, enable_dns ? name : ip_address, zookeeper_connect] end } diff --git a/vagrant/README.md b/vagrant/README.md index 932434a..4cd4dc4 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -2,33 +2,65 @@ Using Vagrant to get up and running. -1) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) -2) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) +1) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) +2) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) +3) Install Vagrant Plugins: -In the main Kafka folder + # Required + $ vagrant plugin install vagrant-hostmanager + # Optional + $ vagrant plugin install vagrant-cachier # Caches & shares package downloads across VMs + +In the main Kafka folder, do a normal Kafka build: $ gradle $ ./gradlew jar + +Configuration will be discussed below, but one setting you likely want to enable +in Vagrantfile.local is `enable_dns = true` to put hostnames in the host's +/etc/hosts file. You probably want this to avoid having to use IP addresses when +addressing the cluster from outside the VMs, e.g. if you run a client on the +host. It's disabled by default since it requires `sudo` access, mucks with your +system state, and breaks with naming conflicts if you try to run multiple +clusters concurrently. + +Now bring up the cluster: + $ vagrant up Once this completes: -* Zookeeper will be running 192.168.50.5 -* Broker 1 on 192.168.50.50 -* Broker 2 on 192.168.50.51 -* Broker 3 on 192.168.50.52 +* Zookeeper will be running on 192.168.50.11 (and `zk1` if you used enable_dns) +* Broker 1 on 192.168.50.51 (and `broker1` if you used enable_dns) +* Broker 2 on 192.168.50.52 (and `broker2` if you used enable_dns) +* Broker 3 on 192.168.50.53 (and `broker3` if you used enable_dns) + +To log into one of the machines: + + vagrant ssh -When you are all up and running you will be back at a command brompt. +You can access the brokers and zookeeper by their IP or hostname, e.g. -If you want you can login to the machines using vagrant ssh but you don't need to. + # With IP + bin/kafka-topics.sh --create --zookeeper 192.168.50.11:2181 --replication-factor 3 --partitions 1 --topic sandbox -You can access the brokers and zookeeper by their IP, e.g. + # With hostname + bin/kafka-console-producer.sh --broker-list broker1:9092,broker2:9092,broker3:9092 --topic sandbox - bin/kafka-topics.sh --create --zookeeper 192.168.50.5:2181 --replication-factor 3 --partitions 1 --topic sandbox + bin/kafka-console-consumer.sh --zookeeper zk1:2181 --topic sandbox --from-beginning - bin/kafka-console-producer.sh --broker-list 192.168.50.50:9092,192.168.50.51:9092,192.168.50.52:9092 --topic sandbox +If you need to update the running cluster, you can re-run the provisioner (the +step that installs software and configures services): - bin/kafka-console-consumer.sh --zookeeper 192.168.50.5:2181 --topic sandbox --from-beginning + vagrant provision + +Note that this doesn't currently ensure a fresh start -- old cluster state will +still remain intact after everything restarts. This can be useful for updating +the cluster to your most recent development version. + +Finally, you can clean up the cluster by destroying all the VMs: + + vagrant destroy ## Configuration ## @@ -37,23 +69,35 @@ You can override some default settings by specifying the values in only ever need to change a few simple configuration variables. Some values you might want to override: -* `NUM_BROKERS` - Number of broker instances to run +* `enable_dns` - Register each VM with a hostname in /etc/hosts on the + hosts. Hostnames are always set in the /etc/hosts in the VMs, so this is only + necessary if you want to address them conveniently from the host for tasks + that aren't provided by Vagrant. +* `num_zookeepers` - Size of zookeeper cluster +* `num_brokers` - Number of broker instances to run ## Using Other Providers ## -EC2 +### EC2 ### Install the `vagrant-aws` plugin to provide EC2 support: $ vagrant plugin install vagrant-aws Next, configure parameters in `Vagrantfile.local`, at a minimum -`ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and -`ec2_security_groups`. Note that you'll have to setup a reasonable security -group yourself. All other settings have reasonable defaults for setting up an -Ubuntu-based cluster, but you may want to customize instance type, region, AMI, -etc. Then start things up, but specify the aws provider: +`enable_dns`, `ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and +`ec2_security_groups`. A couple of important notes: + +1. You definitely want to use `enable_dns` if you plan to run clients outside of + the cluster (e.g. from your local host). If you don't, you'll need to go + lookup `vagrant ssh-config`. + +2. You'll have to setup a reasonable security group yourself. All other settings + have reasonable defaults for setting up an Ubuntu-based cluster, but you may + want to customize instance type, region, AMI, etc. + +Now start things up, but specify the aws provider: $ vagrant up --provider=aws diff --git a/vagrant/base.sh b/vagrant/base.sh new file mode 100644 index 0000000..6f28dfe --- /dev/null +++ b/vagrant/base.sh @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash + +set -e + +if [ -z `which javac` ]; then + apt-get -y update + apt-get install -y software-properties-common python-software-properties + add-apt-repository -y ppa:webupd8team/java + apt-get -y update + + # Try to share cache. See Vagrantfile for details + mkdir -p /var/cache/oracle-jdk7-installer + if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then + find /tmp/oracle-jdk7-installer-cache/ -not -empty -exec cp '{}' /var/cache/oracle-jdk7-installer/ \; + fi + + /bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections + apt-get -y install oracle-java7-installer oracle-java7-set-default + + if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then + cp -R /var/cache/oracle-jdk7-installer/* /tmp/oracle-jdk7-installer-cache + fi +fi + +chmod a+rw /opt +if [ ! -e /opt/kafka ]; then + ln -s /vagrant /opt/kafka +fi diff --git a/vagrant/broker.sh b/vagrant/broker.sh index 9be2a34..63f2d4f 100644 --- a/vagrant/broker.sh +++ b/vagrant/broker.sh @@ -4,7 +4,7 @@ # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software @@ -14,21 +14,25 @@ # limitations under the License. #!/bin/bash -apt-get -y update -apt-get install -y software-properties-common python-software-properties -add-apt-repository -y ppa:webupd8team/java -apt-get -y update -/bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections -apt-get -y install oracle-java7-installer oracle-java7-set-default -chmod a+rw /opt -cd /opt -ln -s /vagrant kafka -cd kafka -IP=$(ifconfig | grep 'inet addr:'| grep 168 | grep 192|cut -d: -f2 | awk '{ print $1}') -sed 's/broker.id=0/'broker.id=$1'/' /opt/kafka/config/server.properties > /tmp/prop1.tmp -sed 's/#advertised.host.name=/'advertised.host.name=$IP'/' /tmp/prop1.tmp > /tmp/prop2.tmp -sed 's/#host.name=localhost/'host.name=$IP'/' /tmp/prop2.tmp > /tmp/prop3.tmp -sed 's/zookeeper.connect=localhost:2181/'zookeeper.connect=192.168.50.5:2181'/' /tmp/prop3.tmp > /opt/server.properties +# Usage: brokers.sh + +set -e + +BROKER_ID=$1 +PUBLIC_ADDRESS=$2 +PUBLIC_ZOOKEEPER_ADDRESSES=$3 + +cd /opt/kafka + +sed \ + -e 's/broker.id=0/'broker.id=$BROKER_ID'/' \ + -e 's/#advertised.host.name=/'advertised.host.name=$PUBLIC_ADDRESS'/' \ + -e 's/zookeeper.connect=localhost:2181/'zookeeper.connect=$PUBLIC_ZOOKEEPER_ADDRESSES'/' \ + /opt/kafka/config/server.properties > /opt/kafka/config/server-$BROKER_ID.properties -bin/kafka-server-start.sh /opt/server.properties 1>> /tmp/broker.log 2>> /tmp/broker.log & +echo "Killing server" +bin/kafka-server-stop.sh || true +sleep 5 # Because kafka-server-stop.sh doesn't actually wait +echo "Starting server" +bin/kafka-server-start.sh /opt/kafka/config/server-$BROKER_ID.properties 1>> /tmp/broker.log 2>> /tmp/broker.log & diff --git a/vagrant/zk.sh b/vagrant/zk.sh index 3cc8dfe..291379e 100644 --- a/vagrant/zk.sh +++ b/vagrant/zk.sh @@ -4,7 +4,7 @@ # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software @@ -14,15 +14,9 @@ # limitations under the License. #!/bin/bash -apt-get -y update -apt-get install -y software-properties-common python-software-properties -add-apt-repository -y ppa:webupd8team/java -apt-get -y update -/bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections -apt-get -y install oracle-java7-installer oracle-java7-set-default -chmod a+rw /opt -cd /opt -ln -s /vagrant kafka -cd kafka -bin/zookeeper-server-start.sh config/zookeeper.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & \ No newline at end of file +set -e + +cd /opt/kafka + +bin/zookeeper-server-start.sh config/zookeeper.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & -- 2.1.2 From 974b605536ca42ab5f202527fb83aae9c4e2dea4 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 22 Oct 2014 16:21:15 -0700 Subject: [PATCH 4/9] Don't sync a few directories that aren't actually required on the server. --- Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vagrantfile b/Vagrantfile index b126054..399ec1f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -94,7 +94,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| aws.security_groups = ec2_security_groups # Exclude some directories that can grow very large from syncing - config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['system_test/', 'logs/'] + config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['.git', 'core/data/', 'logs/', 'system_test/'] end def name_node(node, name) -- 2.1.2 From fb8e427fdbbebb90d4618623dae5e5719afc12d3 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 22 Oct 2014 16:28:43 -0700 Subject: [PATCH 5/9] Add generic worker node support. --- Vagrantfile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Vagrantfile b/Vagrantfile index 399ec1f..4b0518f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -24,6 +24,7 @@ VAGRANTFILE_API_VERSION = "2" enable_dns = false num_zookeepers = 1 num_brokers = 3 +num_workers = 1 # Generic workers that get the code, but don't start any services # EC2 ec2_access_key = nil @@ -134,4 +135,14 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| end } + (1..num_workers).each { |i| + name = "worker" + i.to_s + config.vm.define name do |worker| + name_node(worker, name) + ip_address = "192.168.50." + (100 + i).to_s + worker.vm.network :private_network, ip: ip_address + worker.vm.provision "shell", path: "vagrant/base.sh" + end + } + end -- 2.1.2 From b1867d057ba3c27eeb91dd06a0f5e4da835a941e Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 22 Oct 2014 19:14:20 -0700 Subject: [PATCH 6/9] Default # of workers should be 0 --- Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vagrantfile b/Vagrantfile index 4b0518f..9063c71 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -24,7 +24,7 @@ VAGRANTFILE_API_VERSION = "2" enable_dns = false num_zookeepers = 1 num_brokers = 3 -num_workers = 1 # Generic workers that get the code, but don't start any services +num_workers = 0 # Generic workers that get the code, but don't start any services # EC2 ec2_access_key = nil -- 2.1.2 From 36cb47550165da2bc4673c0ef6a85e5202390acd Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Thu, 23 Oct 2014 18:04:51 -0700 Subject: [PATCH 7/9] Add support for Zookeeper clusters. This requires us to split up allocating VMs and provisioning because Vagrant will run the provisioner for the first node before all nodes are allocated. This leaves the first node running Zookeeper with unroutable peer hostnames which it, for some reason, caches as unroutable. The cluster never properly finishes forming since the nodes are unable to open connections to nodes booted later than they were. The simple solution is to make sure all nodes are booted before starting configuration so we have all the addresses and hostnames available and routable. --- Vagrantfile | 2 +- vagrant/README.md | 6 +++++- vagrant/zk.sh | 22 +++++++++++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 9063c71..598cdb4 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -115,7 +115,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| ip_address = "192.168.50." + (10 + i).to_s zookeeper.vm.network :private_network, ip: ip_address zookeeper.vm.provision "shell", path: "vagrant/base.sh" - zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + zookeeper.vm.provision "shell", path: "vagrant/zk.sh", :args => [i.to_s, num_zookeepers] end } diff --git a/vagrant/README.md b/vagrant/README.md index 4cd4dc4..cf79a2e 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -26,7 +26,11 @@ clusters concurrently. Now bring up the cluster: - $ vagrant up + $ vagrant up --no-provision && vagrant provision + +We separate out the two steps (bringing up the base VMs and configuring them) +due to current limitations in ZooKeeper (ZOOKEEPER-1506) that require us to +collect IPs for all nodes before starting ZooKeeper nodes. Once this completes: diff --git a/vagrant/zk.sh b/vagrant/zk.sh index 291379e..15517f8 100644 --- a/vagrant/zk.sh +++ b/vagrant/zk.sh @@ -15,8 +15,28 @@ #!/bin/bash +# Usage: zk.sh + set -e +ZKID=$1 +NUM_ZK=$2 + cd /opt/kafka -bin/zookeeper-server-start.sh config/zookeeper.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & +cp /opt/kafka/config/zookeeper.properties /opt/kafka/config/zookeeper-$ZKID.properties +echo "initLimit=5" >> /opt/kafka/config/zookeeper-$ZKID.properties +echo "syncLimit=2" >> /opt/kafka/config/zookeeper-$ZKID.properties +echo "quorumListenOnAllIPs=true" >> /opt/kafka/config/zookeeper-$ZKID.properties +for i in `seq 1 $NUM_ZK`; do + echo "server.${i}=zk${i}:2888:3888" >> /opt/kafka/config/zookeeper-$ZKID.properties +done + +mkdir -p /tmp/zookeeper +echo "$ZKID" > /tmp/zookeeper/myid + +echo "Killing ZooKeeper" +bin/zookeeper-server-stop.sh || true +sleep 5 # Because kafka-server-stop.sh doesn't actually wait +echo "Starting ZooKeeper" +bin/zookeeper-server-start.sh config/zookeeper-$ZKID.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & -- 2.1.2 From 5b86cdb648465386517ba0f5816ed221d0906674 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Fri, 7 Nov 2014 10:03:40 -0800 Subject: [PATCH 8/9] Fix AWS provider commands in Vagrant README. --- vagrant/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vagrant/README.md b/vagrant/README.md index cf79a2e..1cc98f6 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -103,7 +103,7 @@ Next, configure parameters in `Vagrantfile.local`, at a minimum Now start things up, but specify the aws provider: - $ vagrant up --provider=aws + $ vagrant up --provider=aws --no-provision && vagrant provision Your instances should get tagged with a name including your hostname to make them identifiable and make it easier to track instances in the AWS management -- 2.1.2 From 213de9405bc555da1c031d1b72a9627336da96bc Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Tue, 11 Nov 2014 13:49:50 -0800 Subject: [PATCH 9/9] Addressing Joe's comments. --- .gitignore | 3 +++ README.md | 4 ++++ Vagrantfile | 7 ++++--- vagrant/README.md | 29 ++++++++++++++++++++--------- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/.gitignore b/.gitignore index 45c17cb..06a6418 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,6 @@ kafka.ipr kafka.iws .vagrant Vagrantfile.local + +config/server-* +config/zookeeper-* diff --git a/README.md b/README.md index 9aca906..11dfdf9 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,10 @@ Please note for this to work you should create/update `~/.gradle/gradle.properti ### Determining how transitive dependencies are added ### ./gradlew core:dependencies --configuration runtime +### Running in Vagrant ### + +See [vagrant/README.md](vagrant/README.md). + ### Contribution ### Apache Kafka is interested in building the community; we would welcome any thoughts or [patches](https://issues.apache.org/jira/browse/KAFKA). You can reach us [on the Apache mailing lists](http://kafka.apache.org/contact.html). diff --git a/Vagrantfile b/Vagrantfile index 598cdb4..3a77b6f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -25,10 +25,11 @@ enable_dns = false num_zookeepers = 1 num_brokers = 3 num_workers = 0 # Generic workers that get the code, but don't start any services +ram_megabytes = 1280 # EC2 -ec2_access_key = nil -ec2_secret_key = nil +ec2_access_key = ENV['AWS_ACCESS_KEY'] +ec2_secret_key = ENV['AWS_SECRET_KEY'] ec2_keypair_name = nil ec2_keypair_file = nil @@ -58,7 +59,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| # Brokers started with the standard script currently set Xms and Xmx to 1G, # plus we need some extra head room. - vb.customize ["modifyvm", :id, "--memory", "1280"] + vb.customize ["modifyvm", :id, "--memory", ram_megabytes.to_s] if Vagrant.has_plugin?("vagrant-cachier") config.cache.scope = :box diff --git a/vagrant/README.md b/vagrant/README.md index 1cc98f6..f6a8f76 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -3,7 +3,7 @@ Using Vagrant to get up and running. 1) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) -2) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) +2) Install Vagrant >= 1.6.4 [http://www.vagrantup.com/](http://www.vagrantup.com/) 3) Install Vagrant Plugins: # Required @@ -16,8 +16,10 @@ In the main Kafka folder, do a normal Kafka build: $ gradle $ ./gradlew jar -Configuration will be discussed below, but one setting you likely want to enable -in Vagrantfile.local is `enable_dns = true` to put hostnames in the host's +You can override default settings in `Vagrantfile.local`, which is a Ruby file +that is ignored by git and imported into the Vagrantfile. +One setting you likely want to enable +in `Vagrantfile.local` is `enable_dns = true` to put hostnames in the host's /etc/hosts file. You probably want this to avoid having to use IP addresses when addressing the cluster from outside the VMs, e.g. if you run a client on the host. It's disabled by default since it requires `sudo` access, mucks with your @@ -45,12 +47,13 @@ To log into one of the machines: You can access the brokers and zookeeper by their IP or hostname, e.g. - # With IP + # Specify ZooKeeper node 1 by it's IP: 192.168.50.11 bin/kafka-topics.sh --create --zookeeper 192.168.50.11:2181 --replication-factor 3 --partitions 1 --topic sandbox - # With hostname + # Specify brokers by their hostnames: broker1, broker2, broker3 bin/kafka-console-producer.sh --broker-list broker1:9092,broker2:9092,broker3:9092 --topic sandbox + # Specify ZooKeeper node by its hostname: zk1 bin/kafka-console-consumer.sh --zookeeper zk1:2181 --topic sandbox --from-beginning If you need to update the running cluster, you can re-run the provisioner (the @@ -89,7 +92,7 @@ Install the `vagrant-aws` plugin to provide EC2 support: $ vagrant plugin install vagrant-aws -Next, configure parameters in `Vagrantfile.local`, at a minimum +Next, configure parameters in `Vagrantfile.local`. A few are *required*: `enable_dns`, `ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and `ec2_security_groups`. A couple of important notes: @@ -97,9 +100,17 @@ Next, configure parameters in `Vagrantfile.local`, at a minimum the cluster (e.g. from your local host). If you don't, you'll need to go lookup `vagrant ssh-config`. -2. You'll have to setup a reasonable security group yourself. All other settings - have reasonable defaults for setting up an Ubuntu-based cluster, but you may - want to customize instance type, region, AMI, etc. +2. You'll have to setup a reasonable security group yourself. You'll need to + open ports for Zookeeper (2888 & 3888 between ZK nodes, 2181 for clients) and + Kafka (9092). Beware that opening these ports to all sources (e.g. so you can + run producers/consumers locally) will allow anyone to access your Kafka + cluster. All other settings have reasonable defaults for setting up an + Ubuntu-based cluster, but you may want to customize instance type, region, + AMI, etc. + +3. `ec2_access_key` and `ec2_secret_key` will use the environment variables + `AWS_ACCESS_KEY` and `AWS_SECRET_KEY` respectively if they are set and not + overridden in `Vagrantfile.local`. Now start things up, but specify the aws provider: -- 2.1.2