Index: bin/benchmark
===================================================================
--- bin/benchmark	(revision 0)
+++ bin/benchmark	(revision 0)
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ -z $1 ]; then
+  echo "Cluster name required!"
+  exit -1
+fi
+
+if [ -z $2 ]; then
+  echo "Must specify the number of workers to start."
+  exit -1
+fi
+
+CLUSTER=$1
+NO_INSTANCES=$2
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+. "$bin"/hadoop-ec2-env.sh
+
+"$bin"/hadoop-ec2 launch-cluster $CLUSTER $NO_INSTANCES
+
+WAIT_FOR=$((NO_INSTANCES - NO_INSTANCES/10))
+echo "Waiting for $WAIT_FOR (of $NO_INSTANCES) worker instances to start"
+while true; do
+  printf "."
+  RUNNING=`ec2-describe-instances | awk '"RESERVATION" == $1 && ("'$CLUSTER'" == $4 || "'$CLUSTER_MASTER'" == $4), "RESERVATION" == $1 && ("'$CLUSTER'" != $4 && "'$CLUSTER_MASTER'" != $4)' | grep running | wc -l`
+  if [ "$RUNNING" -ge "$WAIT_FOR" ]; then
+    echo "$RUNNING worker instances are running"
+    break;
+  fi
+  sleep 1
+done
+
+echo "Waiting for cluster to start"
+sleep 60 # should wait until namenode + 90% of datanodes, jobtracker and 90% of tasktrackers are available
+
+echo "Running a small sort job to warm up cluster"
+timestamp=`date "+%s"`
+"$bin"/hadoop-ec2 "/usr/local/hadoop-*/bin/hadoop jar /usr/local/hadoop-*/hadoop-*-examples.jar randomwriter -D test.randomwriter.maps_per_host=1 -D test.randomwrite.bytes_per_map=1024 random-small-$timestamp > random-small.out 2>&1" $CLUSTER
+"$bin"/hadoop-ec2 "/usr/local/hadoop-*/bin/hadoop jar /usr/local/hadoop-*/hadoop-*-examples.jar sort random-small-$timestamp sort-small-$timestamp > sort-small.out 2>&1" $CLUSTER
+
+echo "Running main sort job"
+timestamp=`date "+%s"`
+"$bin"/hadoop-ec2 "/usr/local/hadoop-*/bin/hadoop jar /usr/local/hadoop-*/hadoop-*-examples.jar randomwriter -D test.randomwriter.maps_per_host=4 -D test.randomwrite.bytes_per_map=$((1024*1024*1024)) random-$timestamp > random.out 2>&1" $CLUSTER
+"$bin"/hadoop-ec2 "/usr/local/hadoop-*/bin/hadoop jar /usr/local/hadoop-*/hadoop-*-examples.jar sort random-$timestamp sort-$timestamp > sort.out 2>&1" $CLUSTER
+
+HOST=`cat $MASTER_IP_PATH`
+scp $SSH_OPTS "root@$HOST:sort.out" "$bin"/sort.out
+sort_duration=`grep "The job took" "$bin"/sort.out | cut -f 4 -d' '`
+rm -f "$bin"/sort.out
+echo $sort_duration
+
+yes yes | "$bin"/hadoop-ec2 terminate-cluster $CLUSTER

Property changes on: bin/benchmark
___________________________________________________________________
Name: svn:executable
   + *

