#!/bin/bash
#####
# esg-compute-tools: ESGF Node Application Stack - Compute Tools
# description: Compute tools installer for the ESGF Node application stack
#
#****************************************************************************
#* *
#* Organization: Lawrence Livermore National Lab (LLNL) *
#* Directorate: Computation *
#* Department: Computing Applications and Research *
#* Division: S&T Global Security *
#* Matrix: Atmospheric, Earth and Energy Division *
#* Program: PCMDI *
#* Project: Earth Systems Grid Fed (ESGF) Node Software Stack *
#* First Author: Eugenia Gabrielova (gabrielov1@llnl.gov) *
#* *
#****************************************************************************
#* *
#* Copyright (c) 2009, Lawrence Livermore National Security, LLC. *
#* Produced at the Lawrence Livermore National Laboratory *
#* Written by: Gavin M. Bell (gavin@llnl.gov), *
#* Eugenia Gabrielova (gabrielov1@llnl.gov) *
#* LLNL-CODE-420962 *
#* *
#* All rights reserved. This file is part of the: *
#* Earth System Grid Fed (ESGF) Node Software Stack, Version 1.0 *
#* *
#* For details, see http://esgf.org/ *
#* Please also read this link *
#* http://esgf.org/LICENSE *
#* *
#* * Redistribution and use in source and binary forms, with or *
#* without modification, are permitted provided that the following *
#* conditions are met: *
#* *
#* * Redistributions of source code must retain the above copyright *
#* notice, this list of conditions and the disclaimer below. *
#* *
#* * Redistributions in binary form must reproduce the above copyright *
#* notice, this list of conditions and the disclaimer (as noted below) *
#* in the documentation and/or other materials provided with the *
#* distribution. *
#* *
#* Neither the name of the LLNS/LLNL nor the names of its contributors *
#* may be used to endorse or promote products derived from this *
#* software without specific prior written permission. *
#* *
#* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS *
#* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT *
#* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS *
#* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE *
#* LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR *
#* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, *
#* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT *
#* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF *
#* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND *
#* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, *
#* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT *
#* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF *
#* SUCH DAMAGE. *
#* *
#****************************************************************************
#####
#####
# Description: Installer for ESG Compute Tools
# Implemented Tools: Spark, Hadoop, Zookeeper
# Tools in Progress:
# * Yarn (Hadoop Streaming)
# * Mesos (Cluster Management)
# * Cascalog (Clojure-based query language for Hadoop)
# * Scoobi (Scala framework for Hadoop)
# Authors: Eugenia Gabrielova {gabrielov1@llnl.gov, genia.likes.science@gmail.com}
####
#####
# uses: git, tar, wget
#####
#--------------
# User Defined / Settable (public)
#--------------
install_prefix=${install_prefix:-"/usr/local"}
esg_root_dir=${esg_root_dir:-${ESGF_HOME:-"/esg"}}
DEBUG=${DEBUG:-0}
git_placeholder="$install_prefix/git"
git_exec_path_param="--exec-path=$git_placeholder/libexec/git-core"
java_placeholder="$install_prefix/java"
java_install_path_config_param="--with-java-home=$java_placeholder/bin/java"
compress_extensions=".tar.gz|.tar.bz2|.tgz|.bz2|.tar"
envfile="/etc/esg.env"
esg_functions_file=./esg-functions
esg_compute_languages_file=./esg-compute-languages
install_manifest=${install_manifest:-"${esg_root_dir}/esgf-install-manifest"}
# esg_config_dir, create directory called esg_config_compute in esg_config_dir
#--------------------------------
# External programs' versions
#--------------------------------
zookeeper_version=${zookeeper_version:="3.3.5"} #
zookeeper_min_version=${zookeeper_min_version:="3.3.3"}
zookeeper_max_version=${zookeeper_max_version:="3.4.3"}
mesos_version=${mesos_version:="0.9.0"}
mesos_min_version=${mesos_min_version:="0.9.0"}
spark_version=${spark_version:="0.5.0"}
spark_min_version=${spark_min_version:="0.5.0"}
hadoop_version=${hadoop_version:="1.0.3"}
hadoop_min_version=${hadoop_min_version:="1.0.1"}
hadoop_max_version=${hadoop_max_version:="1.0.4"}
#--------------------------------
# External programs' script variables
#--------------------------------
mesos_install_dir=${MESOS_HOME:-${install_prefix}/mesos}
mesos_git_url="git://git.apache.org/mesos.git"
mesos_build_dir=${MESOS_BUILD_HOME:-${install_prefix}/mesos} # TODO /mesos/build...
spark_install_dir=${SPARK_HOME:-${install_prefix}/spark}
spark_git_url="git://github.com/mesos/spark.git"
zookeeper_install_dir=${ZOOKEEPER_HOME:-${install_prefix}/zookeeper}
zookeeper_dist_url=http://www.gtlib.gatech.edu/pub/apache/zookeeper/zookeeper-${zookeeper_version}/zookeeper-${zookeeper_version}.tar.gz
hadoop_install_dir=${HADOOP_HOME:-${install_prefix}/hadoop}
hadoop_dist_url=http://www.gtlib.gatech.edu/pub/apache/hadoop/common/hadoop-${hadoop_version}/hadoop-${hadoop_version}.tar.gz
hadoop_work_dir=${hadoop_work_dir:-${install_prefix}/hadoop_working}
#-------------------------------
# External programs' configuration variables
#-------------------------------
zookeeper_client_port=2181
zookeeper_workdir=${zookeeper_install_dir}/zookeeper_working
zookeeper_confdir=${zookeeper_install_dir}/conf
#-------------------------------
# Internal script variables
#-------------------------------
date_format="+%Y_%m_%d_%H%M%S"
[ -e "${envfile}" ] && source ${envfile} && ((VERBOSE)) && printf "sourcing environment from: ${envfile} \n"
[ -e ${esg_functions_file} ] && source ${esg_functions_file} && ((VERBOSE)) && printf "sourcing from: ${esg_functions_file} \n"
#####
# Mesos (Cluster manager for resource sharing across distirbuted applications)
#####
setup_mesos() {
# TODO Build mesos in build directory on install tree
# Checking Mesos Version
echo
echo -n "Checking for Mesos >= ${mesos_min_version}"
if [ -e ${mesos_install_dir} ]; then
# Unconfigured Mesos will not be able to check its version
if [ ! -e ${mesos_install_dir}/configure ]; then
(cd ${mesos_install_dir} && ./bootstrap)
fi
local mesos_current_version=`${mesos_install_dir}/configure -version | head -1 | awk '{print $3}'`
check_version_helper $mesos_current_version ${mesos_min_version}
[ $? == 0 ] && (( ! force_install )) && echo " [OK]" && return 0
else
echo
echo "No Mesos installation detected"
fi
echo
echo "**********************************"
echo "Setting Up Mesos ${mesos_version}"
echo "**********************************"
echo
# Retrieve Mesos Source and build after configuration for ESGF tools
# Uses: Hadoop, Zookeeper, Java, Python
git ${git_exec_path_param} clone ${mesos_git_url} ${mesos_install_dir}
(cd ${mesos_install_dir} && ./bootstrap && ./configure --with-java=/usr/local/java --disable-python --disable-webui && make)
# TODO let's make this config tastier...
# TODO make check hangs on zookeeper install
# Boilerplate: [ $? != 0 ] && echo " ERROR: Could not clone Mesos: ${mesos_git_url}" && popd && checked_done 1
# TODO Mesos /usr/local configuration
# TODO ./configure with Hadoop, Zookeeper, Java, Python from /usr/local
# Add Mesos home to environment and install manifest
write_env_mesos
write_mesos_install_log_entry
}
write_env_mesos() {
echo "export MESOS_HOME=${mesos_install_dir}" >> ${envfile}
dedup ${envfile} && source ${envfile}
return 0
}
write_mesos_install_log_entry() {
local entry="$(date ${date_format}) esg-compute-tools:mesos=${mesos_version} ${mesos_install_dir}"
echo ${entry} >> ${install_manifest}
dedup ${install_manifest}
return 0
}
config_mesos() {
# The most efficient way to start and stop mesos is with built-in deploy scripts, which
# require some configuration.
# [TODO]
# Mesos and Hadoop config
#
# [TODO]
# List of Mesos hosts and slaves
#
# [TODO]
# LIBPROCESS_IP in MESOS_HOME/deploy/mesos-env.sh
#
# [TODO] Zookeeper URL Configuration for Mesos master running
# Required for fault tolerance mode
# First we check for Hadoop configuration - if it exists, request reconfigure, if not,
# proceed automatically.
# TODO Check existence of Hadoop relevant files
# The following is a bit of a hack, but in polite terms, a "work in progress".
echo
doconfig="N"
if [ -e ${mesos_install_dir/hadoop} ]; then
read -e -p "Configure Mesos-Compatible Hadoop Installation? [y/N]: " doconfig
# Configure Hadoop environment for Mesos
if [ "$doconfig" = "Y" ] || [ "$doconfig" = "y" ]; then
echo "Configuring Hadoop for Mesos..."
#
# Inform user of skipped configuration step
else
echo "Skipping Mesos Hadoop Configuration..."
fi
fi
}
test_mesos() {
# Currently running Mesos + Zookeeper with custom deploy methods, tested here.
# Test 1: Mesos + Spark
# Test 2: Mesos + Hadoop
# Test 3: Mesos + Hadoop + Spark
echo
echo "Test 1: Mesos with Spark Framework, [TODO Zookeeper Quorum Management]"
run_mesos_stop_cluster
stop_mesos_zookeeper
start_mesos_zookeeper
# TODO Yuck hard-coding
#local zkflag_spark=zk://localhost:2181,localhost:2182,localhost:2183,localhost:2184,localhost:2185/znode
#local sparkResult=$(${spark_install_dir}/run spark.examples.SparkPi $zkflag_spark) # >& /dev/null && echo $?)
#echo -n "Tested Mesos Local Cluster with Spark... "
#[ $sparkResult == 0 ] && echo "[ PASSED ]" || echo "[ FAILED ]"
run_mesos_stop_cluster
stop_mesos_zookeeper
echo
echo "Test 2: Mesos with Hadoop Framework, [TODO Zookeeper Quorum Management]"
# Starts Zookeeper, starts Mesos, runs Hadoop pseudo-distributed test job on Mesos
# TODO Starts Zookeeper
echo -n "Tested Mesos Local Cluster with Legacy Hadoop... "
echo "[ TBD ]"
echo
echo "Test 3: Mesos with Hadoop, Spark, and [TODO Zookeeper Quorum Management]"
# Starts Zookeeper, starts Mesos, runs Spark job and Hadoop job on mesos
echo -n "Tested Mesos Local Cluster with Hadoop, Spark... "
echo "[ TBD ]"
}
start_mesos_zookeeper() {
# Start Zookeeper Locally replicated server
local quorum=5
for (( n=1; n <= $quorum; n++ ))
do
start_zookeeper_server ${zookeeper_confdir}/zoo_local_replicated_$n.cfg
done
# Start Mesos Master on locally replicated server+port
local zkport=2181
local mesosport=5050
for (( p=1; p <= $quorum; p++ ))
do
# TODO This should be written to a file in Zookeeper's configuration...
local flag="--zk=zk://localhost:2181,localhost:2182,localhost:2183,localhost:2184,localhost:2185/znode --port=$((mesosport++))"
run_mesos_start_master $flag
done
# Start Zookeeper Slaves
}
stop_mesos_zookeeper() {
local quorum=5
for (( n=1; n <= $quorum; n++ ))
do
stop_zookeeper_server ${zookeeper_confdir}/zoo_local_replicated_$n.cfg
done
stop_zookeeper_server ${zookeeper_confdir}/zoo_standalone.cfg
}
run_mesos_start_master() {
# Takes as input any flags (besides quiet) to pass to the master
# TODO Start with Zookeeper URL, ESGF IP address
echo "Starting Mesos Master"
${mesos_build_dir}/bin/mesos-master.sh --quiet $@ &
[ $? == 0 ] && echo "[ Mesos Master now running ]" || echo "[Mesos Master failed to run]"
}
run_mesos_start_slave() {
# TODO Run with ESGF Ip address for mesos master url
# TODO figure out correct memory resources
echo "Starting Mesos Slave"
local mesos_master_url=$(get_config_ip eth0)
${mesos_build_dir}/bin/mesos-slave.sh "--master=$1" "--resources=cpus:2;mem:1024" &
}
run_mesos_stop_cluster() {
# Stop Master without parent shell termination notification
master_pid=$(pgrep mesos-master)
if [ $? == 0 ]; then
echo -n "Stopping Mesos Master ..."
kill $master_pid
wait $master_pid 2>/dev/null
echo "STOPPED"
else
echo "Mesos master not currently running"
fi
# Stop Slaves
slave_pid=$(pgrep mesos-slave)
if [ $? == 0 ]; then
echo -n "Stopping Mesos Slaves ..."
kill $slave_pid
wait $slave_pid 2>/dev/null
echo "STOPPED"
else
echo "Mesos slaves not currently running"
fi
}
clean_mesos() {
doit="N"
if [ -e ${mesos_install_dir} ]; then
read -e -p "remove cluster management framework Mesos? (${mesos_install_dir}) [y/N]: " doit
if [ "doit" = "Y" ] || [ "$doit" = "y" ]; then
echo "removing ${mesos_install_dir}"
rm -rf ${mesos_install_dir}
[ $? != 0 ] && echo "ERROR: Unable to remove ${mesos_install_dir}"
remove_env MESOS_HOME
remove_install_log_entry mesos
fi
fi
}
#####
# Spark (Mapreduce framework in Scala, targeted at iterative applications that make use of working
# sets of data)
#####
setup_spark() {
# Checking for Spark Version
echo
echo -n "Checking for Spark >= ${spark_min_version}"
if [ -e ${spark_install_dir} ]; then
local spark_current_version=$(cd ${spark_install_dir} && sbt/sbt version | awk 'NR==5{split($3,array,"-")} END{print array[1]}' )
check_version_helper $spark_current_version ${spark_min_version}
[ $? == 0 ] && (( ! force_install )) && echo " [OK]" && return
else
echo
echo "No Spark installation detected"
fi
echo
echo "*****************************"
echo "Setting Up Spark ${spark_version}"
echo "*****************************"
echo
# Retrieve and build Spark
git ${git_exec_path_param} clone ${spark_git_url} ${spark_install_dir}
echo "source ${envfile}" >> ${spark_install_dir}/conf/spark-env.sh
# TODO Spark Java environment configuration
(cd ${spark_install_dir} && sbt/sbt compile)
# TODO check for download errors or git failures
# Add Spark home to environment and install manifest
write_env_spark
write_spark_install_log_entry
}
write_env_spark() {
echo "export SPARK_HOME=${spark_install_dir}" >> ${envfile}
dedup ${envfile} && source ${envfile}
return 0
}
write_spark_install_log_entry() {
local entry="$(date ${date_format}) esg-compute-tools:spark=${spark_version} ${spark_install_dir}"
echo ${entry} >> ${install_manifest}
dedup ${install_manifest}
return 0
}
config_spark() {
echo
doconfig="N"
if [ -e ${spark_install_dir} ]; then
read -e -p "Configure Spark Installation? [y/N]: " doconfig
# Configure Spark environment, deployment, local/Mesos
if [ "$doconfig" = "Y" ] || [ "$doconfig" = "y" ]; then
echo "Configuring Spark..."
# TODO Configure Spark here
# Inform user of skipped configuration step
else
echo "Skipping Spark Configuration..."
fi
fi
}
test_spark() {
# Test 1: Running a "local" version of Spark with 2 cores
echo
echo -n "Testing Spark - Local... "
local sparkResult=$(cd ${spark_install_dir} && ./run spark.examples.SparkLR local[2] >& /dev/null && echo $?)
[ $sparkResult == 0 ] && echo "[ PASSED ]" || echo "[ FAILED ]"
}
clean_spark() {
doit="N"
if [ -e ${spark_install_dir} ]; then
read -e -p "remove iterative computation framework Spark? (${spark_install_dir}) [y/N]: " doit
if [ "doit" = "Y" ] || [ "$doit" = "y" ]; then
echo "removing ${spark_install_dir}"
rm -rf ${spark_install_dir}
[ $? != 0 ] && echo "ERROR: Unable to remove ${spark_install_dir}"
remove_env SPARK_HOME
remove_install_log_entry spark
fi
fi
}
#####
# Hadoop (MapReduce distributed computing on HDFS)
#####
setup_hadoop() {
echo
# Check Hadoop Version
echo -n "Checking for Hadoop >= ${hadoop_min_version}"
if [ -e ${hadoop_install_dir} ]; then
local hadoop_current_version=$(export HADOOP_HOME_WARN_SUPPRESS="TRUE" && \
${hadoop_install_dir}/bin/hadoop version | head -1 | awk '{print $2}')
local hadoop_version_number=${hadoop_current_version%-*}
check_version_helper $hadoop_version_number ${hadoop_min_version} ${hadoop_max_version}
[ $? == 0 ] && (( ! force_install )) && echo " [OK]" && return 0
else
echo
echo "No Hadoop installation detected"
fi
echo
echo "*****************************"
echo "Setting up Hadoop ${hadoop_version}"
echo "*****************************"
echo
# Retrieve Hadoop Distribution File
local hadoop_dist_file=${hadoop_dist_url##*/}
local hadoop_dist_dir=$(echo ${hadoop_dist_file} | awk 'gsub(/('$compress_extensions')/,"")')
# Check for empty distribution file (size 0)
# TODO
if [ ! -e ${hadoop_dist_dir} ]; then
echo "Don't see Hadoop distribution directory ${hadoop_dist_dir}"
wget -O "${install_prefix}/${hadoop_dist_file}" ${hadoop_dist_url}
[ $? != 0 ] && echo " ERROR: Could not download Hadoop: ${hadoop_dist_file}" && popd && checked_done 1
echo "Unpacking ${hadoop_dist_file}..."
tar xzf ${install_prefix}/${hadoop_dist_file} -C ${install_prefix}
mv "${install_prefix}/${hadoop_dist_dir}" ${hadoop_install_dir}
mkdir $
[ $? != 0 ] && echo " ERROR: Could not extract Hadoop: ${hadoop_dist_file}" && popd && checked_done 1
fi
# Add Hadoop home to environment and install manifest
write_env_hadoop
write_hadoop_install_log_entry
# Remove Hadoop Distribution File
if [ -e "${install_prefix}/${hadoop_dist_file}" ]; then
rm "${install_prefix}/${hadoop_dist_file}"
fi
# Create working diretory
if [ ! -e ${hadoop_work_dir} ]; then
mkdir ${hadoop_work_dir}
fi
}
write_env_hadoop() {
echo "export HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"" >> ${envfile}
echo "export HADOOP_HOME=${hadoop_install_dir}" >> ${envfile}
echo "export JAVA_HOME=${install_prefix}/java" >> ${hadoop_install_dir}/conf/hadoop-env.sh
dedup ${envfile} && source ${envfile}
return 0
}
write_hadoop_install_log_entry() {
local entry="$(date ${date_format}) esg-compute-tools:hadoop=${hadoop_version} ${hadoop_install_dir}"
echo ${entry} >> ${install_manifest}
dedup ${install_manifest}
return 0
}
config_hadoop() {
# This configuration method should prompt the user to enter settings pertaining to the type
# of Hadoop distribution (local, semi-distributed, cluster distributed).
# There is some info on getting Hadoop setup here: http://hadoop.apache.org/common/#Getting+Started
# TODO Fully distributed operation requires a pretty hefty amount of configuration. Using the defaults
# Hadoop provides is probably best. This configuration guide will be used for fully distributed configuration:
# http://hadoop.apache.org/common/docs/r0.20.2/cluster_setup.html
# Initialize configuration
echo
local doconfig="N"
if [ -e ${hadoop_install_dir} ]; then
read -e -p "Configure Hadoop Installation? [y/N]: " doconfig
# Configure Hadoop modes, HDFS, configuration
if [ "$doconfig" = "Y" ] || [ "$doconfig" = "y" ]; then
echo "Configuring Hadoop..."
# This configures both stand-alone and pseudo-distributed configuration files
pseudo_distributed_config_setup
echo "Configure default Hadoop distribution mode for this node:"
# Stand-alone operation doesn't really require configuration,
# as Hadoop just runs as a .jar file. Some examples may require an
# input and output directory, but it is probably overkill to configure
# that here. Configuring Hadoop to run local should be enough.
echo "[1] Stand-alone operation (good for debugging)"
echo "[2] Pseudo-distributed operation on local node"
read -e -p "(Default: Stand-Alone Operation [2]): " distribution_mode
# Pseudo-Distributed Local Operation
if [ "$distribution_mode" = "2" ]; then
# To learn more about Pseudo-Distributed operation or to fine-tune your configuration,
# check out the Hadoop documentation here:
# http://hadoop.apache.org/common/docs/r0.20.2/quickstart.html#PseudoDistributed
# TODO: Write configuration to hadoop-config in esg-env here...
echo "Hadoop: Psuedo-Distributed operation is now default for this node."
# Standalone Local Operation
else
# TODO: Set standalone as default mode in ESG configuration for hadoop
echo "Hadoop: Stand-Alone Hadoop Operation is now default for this node."
fi
# Inform user of skipped configuration step
else
echo "Skipping Hadoop Configuration..."
fi
fi
}
start_hadoop_local_distributed() {
# TODO Start Hadoop with configuration files from ESG configuration directory
echo "Hadoop: Starting hadoop in local distributed mode..."
${hadoop_install_dir}/bin/start-all.sh --config ${hadoop_install_dir}/conf/local_distributed >& /dev/null
}
stop_hadoop_local_distributed() {
echo "Hadoop: Shutting down namenode and jobtracker..."
${hadoop_install_dir}/bin/stop-all.sh >& /dev/null
}
pseudo_distributed_config_setup() {
# TODO Best practice would be to not keep these guys in Hadoop's install tree...
# Create directories for standalone, local_distributed configuration
# For now we destroy them first.
rm -rf ${hadoop_install_dir}/conf/standalone
rm -rf ${hadoop_install_dir}/conf/local_distributed
rm -rf ${hadoop_install_dir}/conf/managed_cluster
mkdir ${hadoop_install_dir}/conf/standalone
mkdir ${hadoop_install_dir}/conf/local_distributed
mkdir ${hadoop_install_dir}/conf/managed_cluster
# Copy default example config files to new configuration directories
cp ${hadoop_install_dir}/conf/*.xml ${hadoop_install_dir}/conf/standalone/
cp ${hadoop_install_dir}/conf/hadoop-env.sh ${hadoop_install_dir}/conf/standalone/hadoop-env.sh
cp ${hadoop_install_dir}/conf/slaves ${hadoop_install_dir}/conf/standalone/slaves
cp ${hadoop_install_dir}/conf/hadoop-env.sh ${hadoop_install_dir}/conf/local_distributed/hadoop-env.sh
cp ${hadoop_install_dir}/conf/slaves ${hadoop_install_dir}/conf/local_distributed/slaves
cp ${hadoop_install_dir}/conf/hadoop-env.sh ${hadoop_install_dir}/conf/managed_cluster/hadoop-env.sh
cp ${hadoop_install_dir}/conf/*.xml ${hadoop_install_dir}/conf/managed_cluster/
cp ${hadoop_install_dir}/conf/slaves ${hadoop_install_dir}/conf/managed_cluster/slaves
# Create configuration files for pseudo-distributed mode
echo '
fs.default.name
hdfs://localhost:9000
' >> ${hadoop_install_dir}/conf/local_distributed/core-site.xml
echo '
dfs.replication
1
' >> ${hadoop_install_dir}/conf/local_distributed/hdfs-site.xml
echo '
mapred.job.tracker
localhost:9001
' >> ${hadoop_install_dir}/conf/local_distributed/mapred-site.xml
}
test_hadoop() {
echo
# Hadoop Test - Local (Standalone) Mode
test_hadoop_standalone
# Hadoop Test 2 - Local Psuedo-Distributed
test_hadoop_pseudo_distributed
}
test_hadoop_standalone() {
# Hadoop Test - Local (Standalone) Mode, with wordcount example
echo -n "Testing Hadoop - Local (Standalone) Mode... "
mkdir ${hadoop_install_dir}/sandbox
pushd ${hadoop_install_dir} >& /dev/null
mkdir sandbox/input
cp conf/*.xml sandbox/input
bin/hadoop jar hadoop-examples-*.jar grep sandbox/input sandbox/output 'dfs[a-z.]+' >& /dev/null
[ -e sandbox/output/_SUCCESS ] && echo "[ PASSED ]" || echo "[ FAILED ]"
popd >& /dev/null
rm -rf ${hadoop_install_dir}/sandbox
}
hadoop_temp_local_ssh () {
# Setup a local ssh: this functionality is duplicated elsewhere in esg-node
# installer but is needed temporarily for hadoop pseudo-distribution
if [ ! -e ~/.ssh/id_dsa ]; then
ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
fi
}
test_hadoop_pseudo_distributed() {
# Generate temporary ssh keys
hadoop_temp_local_ssh
echo "Begin Testing Hadoop - Pseudo-Distributed Mode... "
if [ -e ${hadoop_install_dir}/conf/local_distributed ]; then
#${hadoop_install_dir}/bin/hadoop namenode -format
start_hadoop_local_distributed
${hadoop_install_dir}/bin/hadoop fs -mkdir ${hadoop_work_dir}/input >& /dev/null
${hadoop_install_dir}/bin/hadoop fs -put ${hadoop_install_dir}/conf/local_distributed/* ${hadoop_work_dir}/input >& /dev/null
# Fix hard-coding yo...
${hadoop_install_dir}/bin/hadoop jar ${hadoop_install_dir}/hadoop-examples-*.jar grep ${hadoop_work_dir}/input ${hadoop_work_dir}/output 'dfs[a-z.]+' >& /dev/null
echo -n "Testing Hadoop - Pseudo-Distributed Mode... "
[ -e ${hadoop_work_dir}/output/_SUCCESS ] && echo "[ PASSED ]" || echo "[ FAILED ]"
${hadoop_install_dir}/bin/hadoop fs -rmr ${hadoop_work_dir}/input >& /dev/null
${hadoop_install_dir}/bin/hadoop fs -rmr ${hadoop_work_dir}/output >& /dev/null
stop_hadoop_local_distributed
else
echo
echo "Hadoop not configured for Pseudo-distributed mode. Skipping test."
fi
}
clean_hadoop() {
doit="N"
if [ -e ${hadoop_install_dir} ]; then
read -e -p "remove mapreduce framework Hadoop? (${hadoop_install_dir}) [y/N]: " doit
if [ "doit" = "Y" ] || [ "$doit" = "y" ]; then
echo "removing ${hadoop_install_dir}"
rm -rf ${hadoop_install_dir}
[ $? != 0 ] && echo "ERROR: Unable to remove ${hadoop_install_dir}"
remove_env HADOOP_HOME
remove_install_log_entry hadoop
fi
fi
}
#####
# Zookeeper (Synchronization and quorum manager for clusters)
#####
setup_zookeeper() {
# Check ZooKeeper Version
echo
echo -n "Checking for Zookeeper >= ${zookeeper_min_version}"
if [ -e ${zookeeper_install_dir} ] && [ -x ${zookeeper_install_dir}/bin/zkServer.sh ]; then
local zookeeper_current_version=$(/bin/ls ${zookeeper_install_dir} | egrep '^zookeeper.*jar$' | sed 's/[^0-9.]*\([0-9.]*\)\..*/\1/')
check_version_helper $zookeeper_current_version ${zookeeper_min_version} ${zookeeper_max_version}
[ $? == 0 ] && (( ! force_install )) && echo " [OK]" && return 0
else
echo
echo "No Zookeeper installation detected"
fi
echo
echo "*****************************"
echo "Setting up Zookeeper ${zookeeper_version}"
echo "*****************************"
echo
# Retrieve Zookeeper Distribution File
local zookeeper_dist_file=${zookeeper_dist_url##*/}
local zookeeper_dist_dir=$(echo ${zookeeper_dist_file} | awk 'gsub(/('$compress_extensions')/,"")')
# Check for empty distribution file (size 0)
# TODO
if [ ! -e ${zookeeper_dist_dir} ]; then
echo "Don't see Zookeeper distribution directory ${zookeeper_dist_dir}"
wget -O "${install_prefix}/${zookeeper_dist_file}" ${zookeeper_dist_url}
[ $? != 0 ] && echo " ERROR: Could not download Zookeeper: ${zookeeper_dist_file}" && popd && checked_done 1
echo "Unpacking ${zookeeper_dist_file}..."
tar xzf ${install_prefix}/${zookeeper_dist_file} -C ${install_prefix}
mv "${install_prefix}/${zookeeper_dist_dir}" ${zookeeper_install_dir}
[ $? != 0 ] && echo " ERROR: Could not extract Zookeeper: ${zookeeper_dist_file}" && popd && checked_done 1
fi
# Add Zookeeper home to environment and install manifest
write_env_zookeeper
write_zookeeper_install_log_entry
}
write_env_zookeeper() {
echo "export ZOOKEEPER_HOME=${zookeeper_install_dir}" >> ${envfile}
dedup ${envfile} && source ${envfile}
return 0
}
write_zookeeper_install_log_entry() {
local entry="$(date ${date_format}) esg-compute-tools:zookeeper=${zookeeper_version} ${zookeeper_install_dir}"
echo ${entry} >> ${install_manifest}
dedup ${install_manifest}
return 0
}
config_zookeeper() {
echo
doconfig="N"
if [ -e ${zookeeper_install_dir} ]; then
read -e -p "Configure Zookeeper Installation? [y/N]: " doconfig
if [ "$doconfig" = "Y" ] || [ "$doconfig" = "y" ]; then
echo "Creating Zookeeper working directories..."
if [ -e ${zookeeper_workdir} ]; then
rm -rf ${zookeeper_workdir}
fi
mkdir ${zookeeper_workdir}
mkdir ${zookeeper_workdir}/standalone
mkdir ${zookeeper_workdir}/local_replication
mkdir ${zookeeper_workdir}/cluster_replication
echo "Configuring Zookeeper for standalone local operation..."
echo "tickTime=2000" >> ${zookeeper_confdir}/zoo_standalone.cfg
echo "dataDir=${zookeeper_workdir}/standalone" \
>> ${zookeeper_confdir}/zoo_standalone.cfg
echo "clientPort=$zookeeper_client_port" \
>> ${zookeeper_confdir}/zoo_standalone.cfg
# This is a bit wordy...but what else to do? Engage beast mode! We
# need a separate configuration for each locally replicated
# zookeeper configuration. For now, just create 5 local replicas,
# though TODO make this a user provided configuration value later.
# 5 is a good number for zookeeper replications because it is
# greater than 3 (magic number), providing outliers; also, it is
# because Zookeeper only supports failures that can result in a
# majority. A 6-member quorum can only support 2 failures, as
# 3 failures would result in a lack of majority. A 5-replica local
# Zookeeper can support two failures, as three live instances still
# provide a majority.
echo "Configuring Zookeeper for locally replicated operation..."
local default_client_port=$zookeeper_client_port
local start_host=2888
local end_host=3888
local quorum_size=5
local zk_replicated_config=${zookeeper_confdir}/zoo_local_replicated
local zk_datadir=${zookeeper_workdir}/local_replication/local
for (( n=1; n <= $quorum_size; n++ ))
do
# Generate configuration file for this server instance
local nth_zk_replicated_config=${zk_replicated_config}_$n.cfg
local nth_zk_datadir=${zk_datadir}_$n
local nth_client_port=$((default_client_port++))
# Create a data directory for this server instance, and a myid
# file to identify it in runtime. The file contains a single
# ASCII character for the server's id
mkdir $nth_zk_datadir
echo "$n" >> $nth_zk_datadir/myid
echo "tickTime=2000" >> $nth_zk_replicated_config
echo "dataDir=$nth_zk_datadir" >> $nth_zk_replicated_config
echo "clientPort=$nth_client_port" >> $nth_zk_replicated_config
echo "initLimit=5" >> $nth_zk_replicated_config
echo "syncLimit=2" >> $nth_zk_replicated_config
# Every Zookeeper instance must be aware of its fellow servers.
# This coordination is achieved by writing a server:host line
# to each server's configuration.
local zhost_1=$start_host
local zhost_2=$end_host
for (( zk=1; zk <= $quorum_size; zk++ ))
do
echo "server.$zk=localhost:$((zhost_1++)):$((zhost_2++))" \
>> $nth_zk_replicated_config
done
done
# TODO Configure Zookeeper file for Mesos here
# TODO echo Configuring Zookeeper for cluster operation
# This will be a lot like local replicated setup, except across
# the cluster. More testing will be needed for that.
else
# Inform user of skipped configuration step
echo "Skipping Zookeeper Configuration..."
fi
fi
}
test_zookeeper() {
# Basic zookeeper test for client connection
echo
echo "Zookeeper Test 1: Standalone Local Server/Client Connection"
start_zookeeper_server "${zookeeper_install_dir}/conf/zoo_standalone.cfg"
${zookeeper_install_dir}/bin/zkCli.sh -server 127.0.0.1:2181 ls / quit >& /dev/null
zkret=$?
echo -n "Tested Zookeeper local standalone configuration... "
[ $zkret == 0 ] && echo " [ PASSED ]" || echo " [ FAILED ]"
stop_zookeeper_server "${zookeeper_install_dir}/conf/zoo_standalone.cfg"
# Zookeeper test for locally replicated cluster
echo
echo "Zookeeper Test 2: Local Replication Test"
echo -n "Tested Zookeeper Local Replication with N Quorum Members"
echo "[ TBD ]"
# TODO Cluster replication test
}
start_zookeeper_server() {
# Starts Zookeeper server with a given configuration file
echo "[ Starting Zookeeper... ]"
${zookeeper_install_dir}/bin/zkServer.sh start $1 >& /dev/null
[ $? == 0 ] && echo "[ Zookeeper running from configuration: $1 ]" || echo "[Zookeeper failed to run, see configuration: $1 ]"
}
stop_zookeeper_server() {
# TODO use command `ps -elfwww | grep -i zookeeper` to parse out currently
# running configuration files
${zookeeper_install_dir}/bin/zkServer.sh stop $1 >& /dev/null
[ $? == 0 ] && echo "[Stopping Zookeeper ... STOPPED]" \
|| echo "[Stopping Zookeeper ... FAILED, check your configuration]"
}
clean_zookeeper() {
doit="N"
if [ -e ${zookeeper_install_dir} ]; then
read -e -p "remove cluster management tool Zookeeper? (${zookeeper_install_dir}) [y/N]: " doit
if [ "doit" = "Y" ] || [ "$doit" = "y" ]; then
echo "removing ${zookeeper_install_dir}"
rm -rf ${zookeeper_install_dir}
[ $? != 0 ] && echo "ERROR: Unable to remove ${zookeeper_install_dir}"
remove_env ZOOKEEPER_HOME
remove_install_log_entry zookeeper
fi
fi
}
#####
# Core Methods
#####
clean_compute_tools() {
# TODO: A few of these computation frameworks rely on one another,
# so uninstalling portions of this suite may leave parts unstable. Some
# sort of sanity check would be useful here.
clean_spark
clean_mesos
clean_hadoop
clean_zookeeper
}
test_compute_tools() {
# Method to run test suites of all compute tools.
echo
echo "---------------------------------"
echo " Testing ESGF Node Compute Tools "
echo "---------------------------------"
echo
test_hadoop
test_zookeeper
test_mesos
test_spark
}
config_compute_tools() {
# Method to run configuration methods of all compute tools.
echo
echo "---------------------------------"
echo " Configuring ESGF Node Compute Tools "
echo "---------------------------------"
echo
config_hadoop
config_zookeeper
config_mesos
config_spark
}
setup_compute_tools() {
echo
echo "-------------------------------------------------------------"
echo "Installing ESGF Node Compute Tools {Hadoop, Zookeeper, Mesos, Spark}"
echo "-------------------------------------------------------------"
# The optimal install order, as below, depends on the following:
# 1. Hadoop
# 2. Zookeeper
# 3. Mesos
# 4. Spark (Requires Mesos for distributed computation)
# Future: Mesos, Yarn (currently on Hadoop Master), Cascalog (Clojure query language for Hadoop)
echo
echo "---------------------------------"
echo " Installing Hadoop - Latest Stable Version"
echo "---------------------------------"
setup_hadoop
config_hadoop
test_hadoop
echo
echo "---------------------------------"
echo " Installing Zookeeper"
echo "---------------------------------"
setup_zookeeper
config_zookeeper
test_zookeeper
echo
echo "---------------------------------"
echo " Installing Mesos and Mesos-Compatible Hadoop"
echo "---------------------------------"
setup_mesos
config_mesos
test_mesos
echo
echo "---------------------------------"
echo " Installing Spark"
echo "---------------------------------"
setup_spark
config_spark
test_spark
}
if [[ "$BASH_SOURCE" == "$0" ]]
then
setup_compute_tools
fi