Index: src/assembly/all.xml =================================================================== --- src/assembly/all.xml (revision 1209240) +++ src/assembly/all.xml (working copy) @@ -77,6 +77,19 @@ update-hbase-env.sh + + src/packages/templates/conf + share/hbase/templates/conf + + + src/packages + sbin + 755 + + hbase-setup-conf.sh + update-hbase-env.sh + + Index: src/docbkx/getting_started.xml =================================================================== --- src/docbkx/getting_started.xml (revision 1209240) +++ src/docbkx/getting_started.xml (working copy) @@ -65,7 +65,33 @@ At this point, you are ready to start HBase. But before starting - it, you might want to edit conf/hbase-site.xml and + it, you might want to configure HBase. + + +
+ Setup HBase + HBase provide a post installation setup script. The script is + design to run as root user to setup directory and file permissions. + This script is designed to configure HBase to work with Hadoop. For + running standalone HBase, skip to section. + + $ sudo hbase-setup-conf.sh --hadoop-home=/usr \ + --hadoop-conf=/etc/hadoop \ + --hadoop-namenode=localhost \ + --hbase-conf=/etc/hbase \ + --hbase-log=/var/log/hbase \ + --hbase-pid=/var/run/hbase \ + --java-home=/usr/java/default \ + --regionservers=localhost \ + --dfs-client-read-shortcircuit=false \ + --dfs-client-read-shortcircuit-skip-checksum=false + + +
+ +
+ Customize HBase Configuration + Edit conf/hbase-site.xml and set the directory you want HBase to write to, hbase.rootdir. Index: src/packages/update-hbase-env.sh =================================================================== --- src/packages/update-hbase-env.sh (revision 1209240) +++ src/packages/update-hbase-env.sh (working copy) @@ -103,13 +103,13 @@ LOG_DIR=${LOG_DIR:-$PREFIX/var/log} PID_DIR=${PID_DIR:-$PREFIX/var/run} UNINSTALL=${UNINSTALL:-0} +ZOOKEEPER_HOME=${ZOOKEEPER_HOME:-/usr} if [ "${ARCH}" != "i386" ]; then LIB_DIR=${LIB_DIR}64 fi . /etc/default/hadoop-env.sh -. /etc/default/zookeeper-env.sh if [ "${UNINSTALL}" -eq "1" ]; then # Remove symlinks Index: src/packages/hbase-setup-conf.sh =================================================================== --- src/packages/hbase-setup-conf.sh (revision 0) +++ src/packages/hbase-setup-conf.sh (revision 0) @@ -0,0 +1,271 @@ +#!/usr/bin/env bash + +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +bin=`dirname "$0"` +bin=`cd "$bin"; pwd` + +#if [ -e "$bin"/../libexec/hbase-config.sh ]; then +# . "$bin"/../libexec/hbase-config.sh +#elif [ -e "$bin"/hbase-config.sh ]; then +# . "$bin"/hbase-config.sh +#else +# echo "hbase-config.sh is not found." +# exit 1 +#fi + +usage() { + echo " +usage: $0 + + Optional parameters: + --hadoop-conf=/etc/hadoop Set Hadoop configuration directory location + --hadoop-home=/usr Set Hadoop directory location + --hadoop-namenode=localhost Set Hadoop namenode hostname + --hadoop-replication=3 Set HDFS replication + --hbase-home=/usr Set HBase directory location + --hbase-conf=/etc/hbase Set HBase configuration directory location + --hbase-log=/var/log/hbase Set HBase log directory location + --hbase-pid=/var/run/hbase Set HBase pid directory location + --hbase-user=hbase Set HBase user + --java-home=/usr/java/default Set JAVA_HOME directory location + --kerberos-realm=KERBEROS.EXAMPLE.COM Set Kerberos realm + --keytab-dir=/etc/security/keytabs Set Keytab directory + --regionservers=localhost Set regionservers hostnames + --zookeeper-home=/usr Set ZooKeeper directory location + --zookeeper-quorum=localhost Set ZooKeeper Quorum + --zookeeper-snapshot=/var/lib/zookeeper Set ZooKeeper snapshot location + --dfs-client-read-shortcircuit=true/false Enable shortcircuit read for the client. Will default to false. + --dfs-client-read-shortcircuit-skip-checksum=true/false Enable/disable the skipping of checksum check + " + exit 1 +} + +template_generator() { + REGEX='(\$\{[a-zA-Z_][a-zA-Z_0-9]*\})' + cat $1 | + while read line ; do + while [[ "$line" =~ $REGEX ]] ; do + LHS=${BASH_REMATCH[1]} + RHS="$(eval echo "\"$LHS\"")" + line=${line//$LHS/$RHS} + done + echo $line >> $2 + done +} + +OPTS=$(getopt \ + -n $0 \ + -o '' \ + -l 'hadoop-conf:' \ + -l 'hadoop-home:' \ + -l 'hadoop-namenode:' \ + -l 'hadoop-replication:' \ + -l 'hbase-conf:' \ + -l 'hbase-home:' \ + -l 'hbase-log:' \ + -l 'hbase-pid:' \ + -l 'hbase-user:' \ + -l 'java-home:' \ + -l 'keytab-dir:' \ + -l 'kerberos-realm:' \ + -l 'master-keytab:' \ + -l 'regionserver-keytab:' \ + -l 'regionservers:' \ + -l 'zookeeper-home:' \ + -l 'zookeeper-quorum:' \ + -l 'zookeeper-snapshot:' \ + -l 'dfs-client-read-shortcircuit:' \ + -l 'dfs-client-read-shortcircuit-skip-checksum:' \ + -o 'h' \ + -- "$@") + +if [ $? != 0 ] ; then + usage +fi + +eval set -- "${OPTS}" +while true ; do + case "$1" in + --hadoop-conf) + HADOOP_CONF_DIR=$2 + shift 2 + ;; + --hadoop-home) + HADOOP_HOME=$2 + shift 2 + ;; + --hadoop-namenode) + HADOOP_NAMENODE=$2 + shift 2 + ;; + --hadoop-replication) + HADOOP_REPLICATION=$2 + shift 2 + ;; + --hbase-conf) + HBASE_CONF_DIR=$2 + shift 2 + ;; + --hbase-home) + HBASE_HOME=$2 + shift 2 + ;; + --hbase-log) + HBASE_LOG_DIR=$2 + shift 2 + ;; + --hbase-pid) + HBASE_PID_DIR=$2 + shift 2 + ;; + --hbase-user) + HBASE_USER=$2 + shift 2 + ;; + --java-home) + JAVA_HOME=$2 + shift 2 + ;; + --kerberos-realm) + KERBEROS_REALM=$2 + shift 2 + ;; + --keytab-dir) + KEYTAB_DIR=$2; shift 2 + ;; + --regionservers) + REGION_SERVERS=$2 + shift 2 + ;; + --zookeeper-home) + ZOOKEEPER_HOME=$2 + shift 2 + ;; + --zookeeper-quorum) + ZOOKEEPER_QUORUM=$2 + shift 2 + ;; + --zookeeper-snapshot) + ZOOKEEPER_SNAPSHOT=$2 + shift 2 + ;; + --dfs-client-read-shortcircuit) + DFS_CLIENT_READ_SHORTCIRCUIT=$2 + shift 2 + ;; + --dfs-client-read-shortcircuit-skip-checksum) + DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=$2 + shift 2 + ;; + --) + shift ; break + ;; + *) + echo "Unknown option: $1" + usage + ;; + esac +done + +JAVA_HOME=${JAVA_HOME:-/usr/java/default} +HADOOP_HOME=${HADOOP_HOME:-/usr} +HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-/etc/hadoop} +HADOOP_NAMENODE=${HADOOP_NAMENODE:-localhost} +HADOOP_REPLICATION=${HADOOP_REPLICATION:-3} +HBASE_HOME=${HBASE_HOME:-/usr} +HBASE_CONF_DIR=${HBASE_CONF_DIR:-/etc/hbase} +HBASE_LOG_DIR=${HBASE_LOG_DIR:-$HBASE_HOME/var/log} +HBASE_PID_DIR=${HBASE_PID_DIR:-$HBASE_HOME/var/run} +HBASE_USER=${HBASE_USER:-hbase} +KEYTAB_DIR=${KEYTAB_DIR:-/etc/security/keytabs} +REGION_SERVERS=${REGION_SERVERS:-localhost} +REGION_SERVERS=`echo ${REGION_SERVERS} | sed -e 's/,/ /g'` +ZOOKEEPER_HOME=${ZOOKEEPER_HOME:-/usr} +ZOOKEEPER_QUORUM=${ZOOKEEPER_QUORUM:-localhost} +ZOOKEEPER_SNAPSHOT=${ZOOKEEPER_SNAPSHOT:-/var/lib/zookeeper} +DFS_CLIENT_READ_SHORTCIRCUIT=${DFS_CLIENT_READ_SHORTCIRCUIT:-false} +DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM=${DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM:-false} +if [ "${KERBEROS_REALM}" != "" ]; then + HBASE_M_K_PRINCIPAL="hm/_HOST@${KERBEROS_REALM}" + HBASE_R_K_PRINCIPAL="rs/_HOST@${KERBEROS_REALM}" + HBASE_M_K_FILE="${KEYTAB_DIR}/hm.service.keytab" + HBASE_R_K_FILE="${KEYTAB_DIR}/rs.service.keytab" + if [ -d ${KEYTAB_DIR} ]; then + chmod 700 ${KEYTAB_DIR}/[hr][ms].service.keytab + chown ${HBASE_USER} ${KEYTAB_DIR}/[hr][ms].service.keytab + fi +else + HBASE_M_K_PRINCIPAL="" + HBASE_R_K_PRINCIPAL="" + HBASE_M_K_FILE="" + HBASE_R_K_FILE="" +fi + +# Verify hadoop exist in HADOOP_HOME. +if [ ! -e ${HADOOP_HOME}/bin/hadoop ]; then + echo "Hadoop does not exist in ${HADOOP_HOME}" + exit 1 +fi + +rm -f ${HBASE_HOME}/lib/hadoop-core*.jar 2>/dev/null + +# Find hadoop jar files for Hadoop 0.20.205+ +HADOOP_JAR=`ls ${HADOOP_HOME}/share/hadoop/hadoop-core*.jar 2>/dev/null || ls ${HADOOP_HOME}/hadoop-core*.jar 2>/dev/null | head -n1` +COMMONCONF_JAR=`ls ${HADOOP_HOME}/share/hadoop/lib/commons-configuration*.jar 2>/dev/null || ls ${HADOOP_HOME}/lib/commons-configuration*.jar 2>/dev/null | head -n1` +if [ "x${HADOOP_JAR}" != "x" ]; then + HBASE_CLASSPATH=${HADOOP_JAR}:${COMMONCONF_JAR} +else + # Find hadoop jar files for Hadoop 0.23+ + COMMON_JAR=`ls ${HADOOP_HOME}/share/hadoop/common/hadoop-common*.jar 2>/dev/null || ls ${HADOOP_HOME}/hadoop-common*.jar 2>/dev/null | head -n1` + HDFS_JAR=`ls ${HADOOP_HOME}/share/hadoop/hdfs/hadoop-hdfs*.jar 2>/dev/null || ls ${HADOOP_HOME}/hadoop-hdfs*.jar 2>/dev/null | head -n1` + MAPREDUCE_JAR=`ls ${HADOOP_HOME}/share/hadoop/mapreduce/hadoop-mapred*.jar 2>/dev/null || ls ${HADOOP_HOME}/hadoop-mapred*.jar 2>/dev/null | head -n1` + COMMONCONF_JAR=`ls ${HADOOP_HOME}/share/hadoop/common/lib/commons-configuration*.jar 2>/dev/null || ls ${HADOOP_HOME}/lib/commons-configuration*.jar 2>/dev/null | head -n1` + + if [ "x${COMMON_JAR}" != "x" ]; then + HBASE_CLASSPATH=${COMMON_JAR}:${HDFS_JAR}:${MAPREDUCE_JAR}:${COMMONCONF_JAR} + else + echo "Can not find Hadoop jar files." + exit 1 + fi +fi + +if [ -e ${HBASE_CONF_DIR}/hbase-env.sh ]; then + rm -f ${HBASE_CONF_DIR}/hbase-env.sh +fi + +if [ -e ${HBASE_CONF_DIR}/hbase-site.xml ]; then + rm -f ${HBASE_CONF_DIR}/hbase-site.xml +fi + +template_generator ${HBASE_HOME}/share/hbase/templates/conf/hbase-env.sh ${HBASE_CONF_DIR}/hbase-env.sh +template_generator ${HBASE_HOME}/share/hbase/templates/conf/hbase-site.xml ${HBASE_CONF_DIR}/hbase-site.xml + +rm -f ${HBASE_CONF_DIR}/regionservers +for host in ${REGION_SERVERS} +do + echo $host >> ${HBASE_CONF_DIR}/regionservers +done + +mkdir -p ${HBASE_LOG_DIR} +mkdir -p ${HBASE_PID_DIR} +mkdir -p ${ZOOKEEPER_SNAPSHOT} +chown ${HBASE_USER} ${HBASE_LOG_DIR} +chown ${HBASE_USER} ${HBASE_PID_DIR} +chown ${HBASE_USER} ${ZOOKEEPER_SNAPSHOT} + +chmod 755 ${HBASE_CONF_DIR}/hbase-env.sh +echo "HBase Configuration setup is completed." Index: src/packages/templates/conf/hbase-env.sh =================================================================== --- src/packages/templates/conf/hbase-env.sh (revision 0) +++ src/packages/templates/conf/hbase-env.sh (revision 0) @@ -0,0 +1,73 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set environment variables here. + +# The java implementation to use. Java 1.6 required. +export JAVA_HOME=${JAVA_HOME} + +# Extra Java CLASSPATH elements. Optional. +export HBASE_CLASSPATH=${HADOOP_CONF_DIR}:${HBASE_CLASSPATH} + +# The maximum amount of heap to use, in MB. Default is 1000. +# export HBASE_HEAPSIZE=1000 + +# Extra Java runtime options. +# Below are what we set by default. May only work with SUN JVM. +# For more on why as well as other possible settings, +# see http://wiki.apache.org/hadoop/PerformanceTuning +export HBASE_OPTS="-ea -XX:+UseConcMarkSweepGC -XX:+CMSIncrementalMode" + +# Uncomment below to enable java garbage collection logging. +# export HBASE_OPTS="$HBASE_OPTS -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -Xloggc:$HBASE_HOME/logs/gc-hbase.log" + +# Uncomment and adjust to enable JMX exporting +# See jmxremote.password and jmxremote.access in $JRE_HOME/lib/management to configure remote password access. +# More details at: http://java.sun.com/javase/6/docs/technotes/guides/management/agent.html +# +# export HBASE_JMX_BASE="-Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.authenticate=false" +# export HBASE_MASTER_OPTS="$HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10101 -javaagent:lib/HelloWorldAgent.jar" +# export HBASE_REGIONSERVER_OPTS="$HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10102" +# export HBASE_THRIFT_OPTS="$HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10103" +# export HBASE_ZOOKEEPER_OPTS="$HBASE_JMX_BASE -Dcom.sun.management.jmxremote.port=10104" + +# File naming hosts on which HRegionServers will run. $HBASE_HOME/conf/regionservers by default. +export HBASE_REGIONSERVERS=${HBASE_HOME}/conf/regionservers + +# Extra ssh options. Empty by default. +# export HBASE_SSH_OPTS="-o ConnectTimeout=1 -o SendEnv=HBASE_CONF_DIR" + +# Where log files are stored. $HBASE_HOME/logs by default. +export HBASE_LOG_DIR=${HBASE_LOG_DIR} + +# A string representing this instance of hbase. $USER by default. +# export HBASE_IDENT_STRING=$USER + +# The scheduling priority for daemon processes. See 'man nice'. +# export HBASE_NICENESS=10 + +# The directory where pid files are stored. /tmp by default. +export HBASE_PID_DIR=${HBASE_PID_DIR} + +# Seconds to sleep between slave commands. Unset by default. This +# can be useful in large clusters, where, e.g., slave rsyncs can +# otherwise arrive faster than the master can service them. +# export HBASE_SLAVE_SLEEP=0.1 + +# Tell HBase whether it should manage it's own instance of Zookeeper or not. +export HBASE_MANAGES_ZK=true Index: src/packages/templates/conf/hbase-site.xml =================================================================== --- src/packages/templates/conf/hbase-site.xml (revision 0) +++ src/packages/templates/conf/hbase-site.xml (revision 0) @@ -0,0 +1,101 @@ + + + + + + hbase.rootdir + hdfs://${HADOOP_NAMENODE}:8020/hbase + The directory shared by RegionServers. + + + + dfs.replication + ${HADOOP_REPLICATION} + The replication count for HLog and HFile storage. Should not be greater than HDFS datanode count. + + + + hbase.cluster.distributed + true + The mode the cluster will be in. Possible values are + false: standalone and pseudo-distributed setups with managed Zookeeper + true: fully-distributed with unmanaged Zookeeper Quorum (see hbase-env.sh) + + + + hbase.zookeeper.property.clientPort + 2181 + Property from ZooKeeper's config zoo.cfg. + The port at which the clients will connect. + + + + hbase.zookeeper.quorum + ${ZOOKEEPER_QUORUM} + Comma separated list of servers in the ZooKeeper Quorum. + For example, "host1.mydomain.com,host2.mydomain.com,host3.mydomain.com". + By default this is set to localhost for local and pseudo-distributed modes + of operation. For a fully-distributed setup, this should be set to a full + list of ZooKeeper quorum servers. If HBASE_MANAGES_ZK is set in hbase-env.sh + this is the list of servers which we will start/stop ZooKeeper on. + + + + hbase.zookeeper.property.dataDir + ${ZOOKEEPER_SNAPSHOT} + Property from ZooKeeper's config zoo.cfg. + The directory where the snapshot is stored. + + + + hbase.master.keytab.file + ${HBASE_M_K_FILE} + + + + hbase.master.kerberos.principal + ${HBASE_M_K_PRINCIPAL} + + + + hbase.regionserver.keytab.file + ${HBASE_R_K_FILE} + + + + hbase.regionserver.kerberos.principal + ${HBASE_R_K_PRINCIPAL} + + + + dfs.client.read.shortcircuit + ${DFS_CLIENT_READ_SHORTCIRCUIT} + Enable/Disable short circuit read for your client. + Hadoop servers should be configured to allow short circuit read + for the hbase user for this to take effect + + + + dfs.client.read.shortcircuit.skip.checksum + ${DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM} + Enable/disbale skipping the checksum check + +