From d6ec669b144f40d7dfe98082a115d2485bdd7bb3 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Fri, 17 Oct 2014 14:01:02 -0700 Subject: [PATCH 1/8] Vagrant setup --- Vagrantfile | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ vagrant/README.md | 33 ++++++++++++++++++++++++++++++ vagrant/broker.sh | 34 +++++++++++++++++++++++++++++++ vagrant/zk.sh | 28 +++++++++++++++++++++++++ 4 files changed, 156 insertions(+) create mode 100644 Vagrantfile create mode 100644 vagrant/README.md create mode 100644 vagrant/broker.sh create mode 100644 vagrant/zk.sh diff --git a/Vagrantfile b/Vagrantfile new file mode 100644 index 0000000..a53de78 --- /dev/null +++ b/Vagrantfile @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# -*- mode: ruby -*- +# vi: set ft=ruby : + +# Vagrantfile API/syntax version. Don't touch unless you know what you're doing! +VAGRANTFILE_API_VERSION = "2" + +# TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered. +Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| + config.vm.box = "precise64" + + # The url from where the 'config.vm.box' box will be fetched if it + # doesn't already exist on the user's system. + config.vm.box_url = "http://files.vagrantup.com/precise64.box" + + config.vm.define "zookeeper" do |zookeeper| + zookeeper.vm.network :private_network, ip: "192.168.50.5" + zookeeper.vm.provider :virtualbox do |vb| + vb.customize ["modifyvm", :id, "--memory", "512"] + end + zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + end + + config.vm.define "brokerOne" do |brokerOne| + brokerOne.vm.network :private_network, ip: "192.168.50.10" + brokerOne.vm.provider :virtualbox do |vb| + vb.customize ["modifyvm", :id, "--memory", "512"] + end + brokerOne.vm.provision "shell", path: "vagrant/broker.sh", :args => "1" + end + + config.vm.define "brokerTwo" do |brokerTwo| + brokerTwo.vm.network :private_network, ip: "192.168.50.20" + brokerTwo.vm.provider :virtualbox do |vb| + vb.customize ["modifyvm", :id, "--memory", "512"] + end + brokerTwo.vm.provision "shell", path: "vagrant/broker.sh", :args => "2" + end + + config.vm.define "brokerThree" do |brokerThree| + brokerThree.vm.network :private_network, ip: "192.168.50.30" + brokerThree.vm.provider :virtualbox do |vb| + vb.customize ["modifyvm", :id, "--memory", "512"] + end + brokerThree.vm.provision "shell", path: "vagrant/broker.sh", :args => "3" + end + +end diff --git a/vagrant/README.md b/vagrant/README.md new file mode 100644 index 0000000..08d5236 --- /dev/null +++ b/vagrant/README.md @@ -0,0 +1,33 @@ +# Apache Kafka # + +Using Vagrant to get up and running. + +1) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) +2) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) + +In the main kafka folder + +1) ./sbt update +2) ./sbt package +3) ./sbt assembly-package-dependency +4) vagrant up + +once this is done +* Zookeeper will be running 192.168.50.5 +* Broker 1 on 192.168.50.10 +* Broker 2 on 192.168.50.20 +* Broker 3 on 192.168.50.30 + +When you are all up and running you will be back at a command brompt. + +If you want you can login to the machines using vagrant ssh but you don't need to. + +You can access the brokers and zookeeper by their IP + +e.g. + +bin/kafka-topics.sh --create --zookeeper 192.168.50.5:2181 --replication-factor 3 --partitions 1 --topic sandbox + +bin/kafka-console-producer.sh --broker-list 192.168.50.10:9092,192.168.50.20:9092,192.168.50.30:9092 --topic sandbox + +bin/kafka-console-consumer.sh --zookeeper 192.168.50.5:2181 --topic sandbox --from-beginning \ No newline at end of file diff --git a/vagrant/broker.sh b/vagrant/broker.sh new file mode 100644 index 0000000..9be2a34 --- /dev/null +++ b/vagrant/broker.sh @@ -0,0 +1,34 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash +apt-get -y update +apt-get install -y software-properties-common python-software-properties +add-apt-repository -y ppa:webupd8team/java +apt-get -y update +/bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections +apt-get -y install oracle-java7-installer oracle-java7-set-default + +chmod a+rw /opt +cd /opt +ln -s /vagrant kafka +cd kafka +IP=$(ifconfig | grep 'inet addr:'| grep 168 | grep 192|cut -d: -f2 | awk '{ print $1}') +sed 's/broker.id=0/'broker.id=$1'/' /opt/kafka/config/server.properties > /tmp/prop1.tmp +sed 's/#advertised.host.name=/'advertised.host.name=$IP'/' /tmp/prop1.tmp > /tmp/prop2.tmp +sed 's/#host.name=localhost/'host.name=$IP'/' /tmp/prop2.tmp > /tmp/prop3.tmp +sed 's/zookeeper.connect=localhost:2181/'zookeeper.connect=192.168.50.5:2181'/' /tmp/prop3.tmp > /opt/server.properties + +bin/kafka-server-start.sh /opt/server.properties 1>> /tmp/broker.log 2>> /tmp/broker.log & diff --git a/vagrant/zk.sh b/vagrant/zk.sh new file mode 100644 index 0000000..3cc8dfe --- /dev/null +++ b/vagrant/zk.sh @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash +apt-get -y update +apt-get install -y software-properties-common python-software-properties +add-apt-repository -y ppa:webupd8team/java +apt-get -y update +/bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections +apt-get -y install oracle-java7-installer oracle-java7-set-default + +chmod a+rw /opt +cd /opt +ln -s /vagrant kafka +cd kafka +bin/zookeeper-server-start.sh config/zookeeper.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & \ No newline at end of file -- 2.1.2 From e215de22f2990cda17d63a1c7f943c3c7124aeb9 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Tue, 21 Oct 2014 19:28:04 -0700 Subject: [PATCH 2/8] Add basic EC2 support, cleaner Vagrantfile, README cleanup, etc. --- .gitignore | 3 +- Vagrantfile | 90 ++++++++++++++++++++++++++++++++++++++----------------- vagrant/README.md | 63 +++++++++++++++++++++++++++----------- 3 files changed, 110 insertions(+), 46 deletions(-) diff --git a/.gitignore b/.gitignore index 99b32a6..1a3c833 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,5 @@ TAGS .settings .gradle kafka.ipr -kafka.iws \ No newline at end of file +kafka.iws +Vagrantfile.local \ No newline at end of file diff --git a/Vagrantfile b/Vagrantfile index a53de78..fffeb9f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -4,7 +4,7 @@ # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software @@ -15,47 +15,81 @@ # -*- mode: ruby -*- # vi: set ft=ruby : +require 'socket' + # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! VAGRANTFILE_API_VERSION = "2" +# General config +num_brokers = 3 + +# EC2 +ec2_access_key = nil +ec2_secret_key = nil +ec2_keypair_name = nil +ec2_keypair_file = nil + +ec2_region = "us-east-1" +ec2_az = nil # Uses set by AWS +ec2_ami = "ami-9eaa1cf6" +ec2_instance_type = "m3.medium" +ec2_user = "ubuntu" + + +if File.exists?("Vagrantfile.local") then + eval(File.read("Vagrantfile.local"), binding, "Vagrantfile.local") +end + # TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered. Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| - config.vm.box = "precise64" + ## Provider-specific global configs - # The url from where the 'config.vm.box' box will be fetched if it - # doesn't already exist on the user's system. - config.vm.box_url = "http://files.vagrantup.com/precise64.box" + config.vm.provider :virtualbox do |vb,override| + override.vm.box = "ubuntu/trusty64" + vb.customize ["modifyvm", :id, "--memory", "512"] + end - config.vm.define "zookeeper" do |zookeeper| - zookeeper.vm.network :private_network, ip: "192.168.50.5" - zookeeper.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--memory", "512"] - end - zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + config.vm.provider :aws do |aws,override| + # The "box" is specified as an AMI + override.vm.box = "dummy" + override.vm.box_url = "https://github.com/mitchellh/vagrant-aws/raw/master/dummy.box" + + override.ssh.username = ec2_user + override.ssh.private_key_path = ec2_keypair_file + + aws.access_key_id = ec2_access_key + aws.secret_access_key = ec2_secret_key + aws.keypair_name = ec2_keypair_name + + aws.region = ec2_region + aws.availability_zone = ec2_az + aws.instance_type = ec2_instance_type + aws.ami = ec2_ami + + # Exclude some directories that can grow very large from syncing + config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['system_test/', 'logs/'] end - config.vm.define "brokerOne" do |brokerOne| - brokerOne.vm.network :private_network, ip: "192.168.50.10" - brokerOne.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--memory", "512"] + def name_node(node, name) + node.vm.provider :aws do |aws| + aws.tags = { 'Name' => "kafka-vagrant-" + Socket.gethostname + "-" + name } end - brokerOne.vm.provision "shell", path: "vagrant/broker.sh", :args => "1" end - config.vm.define "brokerTwo" do |brokerTwo| - brokerTwo.vm.network :private_network, ip: "192.168.50.20" - brokerTwo.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--memory", "512"] - end - brokerTwo.vm.provision "shell", path: "vagrant/broker.sh", :args => "2" + ## Cluster definition + config.vm.define "zookeeper" do |zookeeper| + zookeeper.vm.network :private_network, ip: "192.168.50.5" + zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + name_node(zookeeper, "zookeeper") end - config.vm.define "brokerThree" do |brokerThree| - brokerThree.vm.network :private_network, ip: "192.168.50.30" - brokerThree.vm.provider :virtualbox do |vb| - vb.customize ["modifyvm", :id, "--memory", "512"] + (1..num_brokers).each { |i| + brokerName = "broker" + i.to_s + config.vm.define brokerName do |broker| + broker.vm.network :private_network, ip: "192.168.50." + (50 + i).to_s + broker.vm.provision "shell", path: "vagrant/broker.sh", :args => i.to_s + name_node(broker, brokerName) end - brokerThree.vm.provision "shell", path: "vagrant/broker.sh", :args => "3" - end + } end diff --git a/vagrant/README.md b/vagrant/README.md index 08d5236..932434a 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -2,32 +2,61 @@ Using Vagrant to get up and running. -1) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) -2) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) +1) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) +2) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) -In the main kafka folder +In the main Kafka folder -1) ./sbt update -2) ./sbt package -3) ./sbt assembly-package-dependency -4) vagrant up + $ gradle + $ ./gradlew jar + $ vagrant up + +Once this completes: -once this is done * Zookeeper will be running 192.168.50.5 -* Broker 1 on 192.168.50.10 -* Broker 2 on 192.168.50.20 -* Broker 3 on 192.168.50.30 +* Broker 1 on 192.168.50.50 +* Broker 2 on 192.168.50.51 +* Broker 3 on 192.168.50.52 -When you are all up and running you will be back at a command brompt. +When you are all up and running you will be back at a command brompt. If you want you can login to the machines using vagrant ssh but you don't need to. -You can access the brokers and zookeeper by their IP +You can access the brokers and zookeeper by their IP, e.g. + + bin/kafka-topics.sh --create --zookeeper 192.168.50.5:2181 --replication-factor 3 --partitions 1 --topic sandbox + + bin/kafka-console-producer.sh --broker-list 192.168.50.50:9092,192.168.50.51:9092,192.168.50.52:9092 --topic sandbox + + bin/kafka-console-consumer.sh --zookeeper 192.168.50.5:2181 --topic sandbox --from-beginning + +## Configuration ## + +You can override some default settings by specifying the values in +`Vagrantfile.local`. It is interpreted as a Ruby file, although you'll probably +only ever need to change a few simple configuration variables. Some values you +might want to override: + +* `NUM_BROKERS` - Number of broker instances to run + + +## Using Other Providers ## + +EC2 + +Install the `vagrant-aws` plugin to provide EC2 support: -e.g. + $ vagrant plugin install vagrant-aws -bin/kafka-topics.sh --create --zookeeper 192.168.50.5:2181 --replication-factor 3 --partitions 1 --topic sandbox +Next, configure parameters in `Vagrantfile.local`, at a minimum +`ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and +`ec2_security_groups`. Note that you'll have to setup a reasonable security +group yourself. All other settings have reasonable defaults for setting up an +Ubuntu-based cluster, but you may want to customize instance type, region, AMI, +etc. Then start things up, but specify the aws provider: -bin/kafka-console-producer.sh --broker-list 192.168.50.10:9092,192.168.50.20:9092,192.168.50.30:9092 --topic sandbox + $ vagrant up --provider=aws -bin/kafka-console-consumer.sh --zookeeper 192.168.50.5:2181 --topic sandbox --from-beginning \ No newline at end of file +Your instances should get tagged with a name including your hostname to make +them identifiable and make it easier to track instances in the AWS management +console. -- 2.1.2 From 439d069f96a528c0539d924543a8c55e0862f80f Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 22 Oct 2014 15:27:02 -0700 Subject: [PATCH 3/8] Better naming, hostmanager for routable VM names, vagrant-cachier to reduce startup cost, cleanup provisioning scripts, initial support for multiple zookeepers, general cleanup. --- .gitignore | 3 +- Vagrantfile | 72 +++++++++++++++++++++++++++++++++++++---------- vagrant/README.md | 84 ++++++++++++++++++++++++++++++++++++++++++------------- vagrant/base.sh | 43 ++++++++++++++++++++++++++++ vagrant/broker.sh | 38 ++++++++++++++----------- vagrant/zk.sh | 18 ++++-------- 6 files changed, 193 insertions(+), 65 deletions(-) create mode 100644 vagrant/base.sh diff --git a/.gitignore b/.gitignore index 1a3c833..45c17cb 100644 --- a/.gitignore +++ b/.gitignore @@ -21,4 +21,5 @@ TAGS .gradle kafka.ipr kafka.iws -Vagrantfile.local \ No newline at end of file +.vagrant +Vagrantfile.local diff --git a/Vagrantfile b/Vagrantfile index fffeb9f..b126054 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -21,6 +21,8 @@ require 'socket' VAGRANTFILE_API_VERSION = "2" # General config +enable_dns = false +num_zookeepers = 1 num_brokers = 3 # EC2 @@ -34,19 +36,41 @@ ec2_az = nil # Uses set by AWS ec2_ami = "ami-9eaa1cf6" ec2_instance_type = "m3.medium" ec2_user = "ubuntu" +ec2_security_groups = nil - -if File.exists?("Vagrantfile.local") then - eval(File.read("Vagrantfile.local"), binding, "Vagrantfile.local") +local_config_file = File.join(File.dirname(__FILE__), "Vagrantfile.local") +if File.exists?(local_config_file) then + eval(File.read(local_config_file), binding, "Vagrantfile.local") end # TODO(ksweeney): RAM requirements are not empirical and can probably be significantly lowered. Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| - ## Provider-specific global configs + config.hostmanager.enabled = true + config.hostmanager.manage_host = enable_dns + config.hostmanager.include_offline = false + ## Provider-specific global configs config.vm.provider :virtualbox do |vb,override| override.vm.box = "ubuntu/trusty64" - vb.customize ["modifyvm", :id, "--memory", "512"] + + override.hostmanager.ignore_private_ip = false + + # Brokers started with the standard script currently set Xms and Xmx to 1G, + # plus we need some extra head room. + vb.customize ["modifyvm", :id, "--memory", "1280"] + + if Vagrant.has_plugin?("vagrant-cachier") + config.cache.scope = :box + # Besides the defaults, we use a custom cache to handle the Oracle JDK + # download, which downloads via wget during an apt install. Because of the + # way the installer ends up using its cache directory, we need to jump + # through some hoops instead of just specifying a cache directly -- we + # share to a temporary location and the provisioning scripts symlink data + # to the right location. + config.cache.enable :generic, { + "oracle-jdk7" => { cache_dir: "/tmp/oracle-jdk7-installer-cache" }, + } + end end config.vm.provider :aws do |aws,override| @@ -54,6 +78,8 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| override.vm.box = "dummy" override.vm.box_url = "https://github.com/mitchellh/vagrant-aws/raw/master/dummy.box" + override.hostmanager.ignore_private_ip = true + override.ssh.username = ec2_user override.ssh.private_key_path = ec2_keypair_file @@ -65,30 +91,46 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| aws.availability_zone = ec2_az aws.instance_type = ec2_instance_type aws.ami = ec2_ami + aws.security_groups = ec2_security_groups # Exclude some directories that can grow very large from syncing config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['system_test/', 'logs/'] end def name_node(node, name) + node.vm.hostname = name node.vm.provider :aws do |aws| aws.tags = { 'Name' => "kafka-vagrant-" + Socket.gethostname + "-" + name } end end ## Cluster definition - config.vm.define "zookeeper" do |zookeeper| - zookeeper.vm.network :private_network, ip: "192.168.50.5" - zookeeper.vm.provision "shell", path: "vagrant/zk.sh" - name_node(zookeeper, "zookeeper") - end + zookeepers = [] + (1..num_zookeepers).each { |i| + name = "zk" + i.to_s + zookeepers.push(name) + config.vm.define name do |zookeeper| + name_node(zookeeper, name) + ip_address = "192.168.50." + (10 + i).to_s + zookeeper.vm.network :private_network, ip: ip_address + zookeeper.vm.provision "shell", path: "vagrant/base.sh" + zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + end + } (1..num_brokers).each { |i| - brokerName = "broker" + i.to_s - config.vm.define brokerName do |broker| - broker.vm.network :private_network, ip: "192.168.50." + (50 + i).to_s - broker.vm.provision "shell", path: "vagrant/broker.sh", :args => i.to_s - name_node(broker, brokerName) + name = "broker" + i.to_s + config.vm.define name do |broker| + name_node(broker, name) + ip_address = "192.168.50." + (50 + i).to_s + broker.vm.network :private_network, ip: ip_address + # We need to be careful about what we list as the publicly routable + # address since this is registered in ZK and handed out to clients. If + # host DNS isn't setup, we shouldn't use hostnames -- IP addresses must be + # used to support clients running on the host. + zookeeper_connect = zookeepers.map{ |zk_addr| zk_addr + ":2181"}.join(",") + broker.vm.provision "shell", path: "vagrant/base.sh" + broker.vm.provision "shell", path: "vagrant/broker.sh", :args => [i.to_s, enable_dns ? name : ip_address, zookeeper_connect] end } diff --git a/vagrant/README.md b/vagrant/README.md index 932434a..4cd4dc4 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -2,33 +2,65 @@ Using Vagrant to get up and running. -1) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) -2) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) +1) Install Virtual Box [https://www.virtualbox.org/](https://www.virtualbox.org/) +2) Install Vagrant [http://www.vagrantup.com/](http://www.vagrantup.com/) +3) Install Vagrant Plugins: -In the main Kafka folder + # Required + $ vagrant plugin install vagrant-hostmanager + # Optional + $ vagrant plugin install vagrant-cachier # Caches & shares package downloads across VMs + +In the main Kafka folder, do a normal Kafka build: $ gradle $ ./gradlew jar + +Configuration will be discussed below, but one setting you likely want to enable +in Vagrantfile.local is `enable_dns = true` to put hostnames in the host's +/etc/hosts file. You probably want this to avoid having to use IP addresses when +addressing the cluster from outside the VMs, e.g. if you run a client on the +host. It's disabled by default since it requires `sudo` access, mucks with your +system state, and breaks with naming conflicts if you try to run multiple +clusters concurrently. + +Now bring up the cluster: + $ vagrant up Once this completes: -* Zookeeper will be running 192.168.50.5 -* Broker 1 on 192.168.50.50 -* Broker 2 on 192.168.50.51 -* Broker 3 on 192.168.50.52 +* Zookeeper will be running on 192.168.50.11 (and `zk1` if you used enable_dns) +* Broker 1 on 192.168.50.51 (and `broker1` if you used enable_dns) +* Broker 2 on 192.168.50.52 (and `broker2` if you used enable_dns) +* Broker 3 on 192.168.50.53 (and `broker3` if you used enable_dns) + +To log into one of the machines: + + vagrant ssh -When you are all up and running you will be back at a command brompt. +You can access the brokers and zookeeper by their IP or hostname, e.g. -If you want you can login to the machines using vagrant ssh but you don't need to. + # With IP + bin/kafka-topics.sh --create --zookeeper 192.168.50.11:2181 --replication-factor 3 --partitions 1 --topic sandbox -You can access the brokers and zookeeper by their IP, e.g. + # With hostname + bin/kafka-console-producer.sh --broker-list broker1:9092,broker2:9092,broker3:9092 --topic sandbox - bin/kafka-topics.sh --create --zookeeper 192.168.50.5:2181 --replication-factor 3 --partitions 1 --topic sandbox + bin/kafka-console-consumer.sh --zookeeper zk1:2181 --topic sandbox --from-beginning - bin/kafka-console-producer.sh --broker-list 192.168.50.50:9092,192.168.50.51:9092,192.168.50.52:9092 --topic sandbox +If you need to update the running cluster, you can re-run the provisioner (the +step that installs software and configures services): - bin/kafka-console-consumer.sh --zookeeper 192.168.50.5:2181 --topic sandbox --from-beginning + vagrant provision + +Note that this doesn't currently ensure a fresh start -- old cluster state will +still remain intact after everything restarts. This can be useful for updating +the cluster to your most recent development version. + +Finally, you can clean up the cluster by destroying all the VMs: + + vagrant destroy ## Configuration ## @@ -37,23 +69,35 @@ You can override some default settings by specifying the values in only ever need to change a few simple configuration variables. Some values you might want to override: -* `NUM_BROKERS` - Number of broker instances to run +* `enable_dns` - Register each VM with a hostname in /etc/hosts on the + hosts. Hostnames are always set in the /etc/hosts in the VMs, so this is only + necessary if you want to address them conveniently from the host for tasks + that aren't provided by Vagrant. +* `num_zookeepers` - Size of zookeeper cluster +* `num_brokers` - Number of broker instances to run ## Using Other Providers ## -EC2 +### EC2 ### Install the `vagrant-aws` plugin to provide EC2 support: $ vagrant plugin install vagrant-aws Next, configure parameters in `Vagrantfile.local`, at a minimum -`ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and -`ec2_security_groups`. Note that you'll have to setup a reasonable security -group yourself. All other settings have reasonable defaults for setting up an -Ubuntu-based cluster, but you may want to customize instance type, region, AMI, -etc. Then start things up, but specify the aws provider: +`enable_dns`, `ec2_access_key`, `ec2_secret_key`, `ec2_keypair_name`, `ec2_keypair_file`, and +`ec2_security_groups`. A couple of important notes: + +1. You definitely want to use `enable_dns` if you plan to run clients outside of + the cluster (e.g. from your local host). If you don't, you'll need to go + lookup `vagrant ssh-config`. + +2. You'll have to setup a reasonable security group yourself. All other settings + have reasonable defaults for setting up an Ubuntu-based cluster, but you may + want to customize instance type, region, AMI, etc. + +Now start things up, but specify the aws provider: $ vagrant up --provider=aws diff --git a/vagrant/base.sh b/vagrant/base.sh new file mode 100644 index 0000000..6f28dfe --- /dev/null +++ b/vagrant/base.sh @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#!/bin/bash + +set -e + +if [ -z `which javac` ]; then + apt-get -y update + apt-get install -y software-properties-common python-software-properties + add-apt-repository -y ppa:webupd8team/java + apt-get -y update + + # Try to share cache. See Vagrantfile for details + mkdir -p /var/cache/oracle-jdk7-installer + if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then + find /tmp/oracle-jdk7-installer-cache/ -not -empty -exec cp '{}' /var/cache/oracle-jdk7-installer/ \; + fi + + /bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections + apt-get -y install oracle-java7-installer oracle-java7-set-default + + if [ -e "/tmp/oracle-jdk7-installer-cache/" ]; then + cp -R /var/cache/oracle-jdk7-installer/* /tmp/oracle-jdk7-installer-cache + fi +fi + +chmod a+rw /opt +if [ ! -e /opt/kafka ]; then + ln -s /vagrant /opt/kafka +fi diff --git a/vagrant/broker.sh b/vagrant/broker.sh index 9be2a34..63f2d4f 100644 --- a/vagrant/broker.sh +++ b/vagrant/broker.sh @@ -4,7 +4,7 @@ # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software @@ -14,21 +14,25 @@ # limitations under the License. #!/bin/bash -apt-get -y update -apt-get install -y software-properties-common python-software-properties -add-apt-repository -y ppa:webupd8team/java -apt-get -y update -/bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections -apt-get -y install oracle-java7-installer oracle-java7-set-default -chmod a+rw /opt -cd /opt -ln -s /vagrant kafka -cd kafka -IP=$(ifconfig | grep 'inet addr:'| grep 168 | grep 192|cut -d: -f2 | awk '{ print $1}') -sed 's/broker.id=0/'broker.id=$1'/' /opt/kafka/config/server.properties > /tmp/prop1.tmp -sed 's/#advertised.host.name=/'advertised.host.name=$IP'/' /tmp/prop1.tmp > /tmp/prop2.tmp -sed 's/#host.name=localhost/'host.name=$IP'/' /tmp/prop2.tmp > /tmp/prop3.tmp -sed 's/zookeeper.connect=localhost:2181/'zookeeper.connect=192.168.50.5:2181'/' /tmp/prop3.tmp > /opt/server.properties +# Usage: brokers.sh + +set -e + +BROKER_ID=$1 +PUBLIC_ADDRESS=$2 +PUBLIC_ZOOKEEPER_ADDRESSES=$3 + +cd /opt/kafka + +sed \ + -e 's/broker.id=0/'broker.id=$BROKER_ID'/' \ + -e 's/#advertised.host.name=/'advertised.host.name=$PUBLIC_ADDRESS'/' \ + -e 's/zookeeper.connect=localhost:2181/'zookeeper.connect=$PUBLIC_ZOOKEEPER_ADDRESSES'/' \ + /opt/kafka/config/server.properties > /opt/kafka/config/server-$BROKER_ID.properties -bin/kafka-server-start.sh /opt/server.properties 1>> /tmp/broker.log 2>> /tmp/broker.log & +echo "Killing server" +bin/kafka-server-stop.sh || true +sleep 5 # Because kafka-server-stop.sh doesn't actually wait +echo "Starting server" +bin/kafka-server-start.sh /opt/kafka/config/server-$BROKER_ID.properties 1>> /tmp/broker.log 2>> /tmp/broker.log & diff --git a/vagrant/zk.sh b/vagrant/zk.sh index 3cc8dfe..291379e 100644 --- a/vagrant/zk.sh +++ b/vagrant/zk.sh @@ -4,7 +4,7 @@ # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software @@ -14,15 +14,9 @@ # limitations under the License. #!/bin/bash -apt-get -y update -apt-get install -y software-properties-common python-software-properties -add-apt-repository -y ppa:webupd8team/java -apt-get -y update -/bin/echo debconf shared/accepted-oracle-license-v1-1 select true | /usr/bin/debconf-set-selections -apt-get -y install oracle-java7-installer oracle-java7-set-default -chmod a+rw /opt -cd /opt -ln -s /vagrant kafka -cd kafka -bin/zookeeper-server-start.sh config/zookeeper.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & \ No newline at end of file +set -e + +cd /opt/kafka + +bin/zookeeper-server-start.sh config/zookeeper.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & -- 2.1.2 From 6e1f03f2bce574cb9a544e8217109af7959b3058 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 22 Oct 2014 16:21:15 -0700 Subject: [PATCH 4/8] Don't sync a few directories that aren't actually required on the server. --- Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vagrantfile b/Vagrantfile index b126054..399ec1f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -94,7 +94,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| aws.security_groups = ec2_security_groups # Exclude some directories that can grow very large from syncing - config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['system_test/', 'logs/'] + config.vm.synced_folder ".", "/vagrant", type: "rsync", :rsync_excludes => ['.git', 'core/data/', 'logs/', 'system_test/'] end def name_node(node, name) -- 2.1.2 From 6b7bd31a46b5e8de6fa026a12f88024fb59f3dcc Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 22 Oct 2014 16:28:43 -0700 Subject: [PATCH 5/8] Add generic worker node support. --- Vagrantfile | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/Vagrantfile b/Vagrantfile index 399ec1f..4b0518f 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -24,6 +24,7 @@ VAGRANTFILE_API_VERSION = "2" enable_dns = false num_zookeepers = 1 num_brokers = 3 +num_workers = 1 # Generic workers that get the code, but don't start any services # EC2 ec2_access_key = nil @@ -134,4 +135,14 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| end } + (1..num_workers).each { |i| + name = "worker" + i.to_s + config.vm.define name do |worker| + name_node(worker, name) + ip_address = "192.168.50." + (100 + i).to_s + worker.vm.network :private_network, ip: ip_address + worker.vm.provision "shell", path: "vagrant/base.sh" + end + } + end -- 2.1.2 From c1f1d22bdb4e70e09d9cb15da26babdb3f89a283 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Wed, 22 Oct 2014 19:14:20 -0700 Subject: [PATCH 6/8] Default # of workers should be 0 --- Vagrantfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Vagrantfile b/Vagrantfile index 4b0518f..9063c71 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -24,7 +24,7 @@ VAGRANTFILE_API_VERSION = "2" enable_dns = false num_zookeepers = 1 num_brokers = 3 -num_workers = 1 # Generic workers that get the code, but don't start any services +num_workers = 0 # Generic workers that get the code, but don't start any services # EC2 ec2_access_key = nil -- 2.1.2 From 5a5dd571d9146b06cb66d05444470e400521dd63 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Thu, 23 Oct 2014 18:04:51 -0700 Subject: [PATCH 7/8] Add support for Zookeeper clusters. This requires us to split up allocating VMs and provisioning because Vagrant will run the provisioner for the first node before all nodes are allocated. This leaves the first node running Zookeeper with unroutable peer hostnames which it, for some reason, caches as unroutable. The cluster never properly finishes forming since the nodes are unable to open connections to nodes booted later than they were. The simple solution is to make sure all nodes are booted before starting configuration so we have all the addresses and hostnames available and routable. --- Vagrantfile | 2 +- vagrant/README.md | 6 +++++- vagrant/zk.sh | 22 +++++++++++++++++++++- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 9063c71..598cdb4 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -115,7 +115,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| ip_address = "192.168.50." + (10 + i).to_s zookeeper.vm.network :private_network, ip: ip_address zookeeper.vm.provision "shell", path: "vagrant/base.sh" - zookeeper.vm.provision "shell", path: "vagrant/zk.sh" + zookeeper.vm.provision "shell", path: "vagrant/zk.sh", :args => [i.to_s, num_zookeepers] end } diff --git a/vagrant/README.md b/vagrant/README.md index 4cd4dc4..cf79a2e 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -26,7 +26,11 @@ clusters concurrently. Now bring up the cluster: - $ vagrant up + $ vagrant up --no-provision && vagrant provision + +We separate out the two steps (bringing up the base VMs and configuring them) +due to current limitations in ZooKeeper (ZOOKEEPER-1506) that require us to +collect IPs for all nodes before starting ZooKeeper nodes. Once this completes: diff --git a/vagrant/zk.sh b/vagrant/zk.sh index 291379e..15517f8 100644 --- a/vagrant/zk.sh +++ b/vagrant/zk.sh @@ -15,8 +15,28 @@ #!/bin/bash +# Usage: zk.sh + set -e +ZKID=$1 +NUM_ZK=$2 + cd /opt/kafka -bin/zookeeper-server-start.sh config/zookeeper.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & +cp /opt/kafka/config/zookeeper.properties /opt/kafka/config/zookeeper-$ZKID.properties +echo "initLimit=5" >> /opt/kafka/config/zookeeper-$ZKID.properties +echo "syncLimit=2" >> /opt/kafka/config/zookeeper-$ZKID.properties +echo "quorumListenOnAllIPs=true" >> /opt/kafka/config/zookeeper-$ZKID.properties +for i in `seq 1 $NUM_ZK`; do + echo "server.${i}=zk${i}:2888:3888" >> /opt/kafka/config/zookeeper-$ZKID.properties +done + +mkdir -p /tmp/zookeeper +echo "$ZKID" > /tmp/zookeeper/myid + +echo "Killing ZooKeeper" +bin/zookeeper-server-stop.sh || true +sleep 5 # Because kafka-server-stop.sh doesn't actually wait +echo "Starting ZooKeeper" +bin/zookeeper-server-start.sh config/zookeeper-$ZKID.properties 1>> /tmp/zk.log 2>> /tmp/zk.log & -- 2.1.2 From 002e9cba2932b3af98797873fce8a71c8d3a1824 Mon Sep 17 00:00:00 2001 From: Ewen Cheslack-Postava Date: Fri, 7 Nov 2014 10:03:40 -0800 Subject: [PATCH 8/8] Fix AWS provider commands in Vagrant README. --- vagrant/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vagrant/README.md b/vagrant/README.md index cf79a2e..1cc98f6 100644 --- a/vagrant/README.md +++ b/vagrant/README.md @@ -103,7 +103,7 @@ Next, configure parameters in `Vagrantfile.local`, at a minimum Now start things up, but specify the aws provider: - $ vagrant up --provider=aws + $ vagrant up --provider=aws --no-provision && vagrant provision Your instances should get tagged with a name including your hostname to make them identifiable and make it easier to track instances in the AWS management -- 2.1.2