#!/bin/bash ##### # esg-compute-tools: ESGF Node Application Stack - Compute Tools # description: Compute tools installer for the ESGF Node application stack # #**************************************************************************** #* * #* Organization: Lawrence Livermore National Lab (LLNL) * #* Directorate: Computation * #* Department: Computing Applications and Research * #* Division: S&T Global Security * #* Matrix: Atmospheric, Earth and Energy Division * #* Program: PCMDI * #* Project: Earth Systems Grid Fed (ESGF) Node Software Stack * #* First Author: Eugenia Gabrielova (gabrielov1@llnl.gov) * #* * #**************************************************************************** #* * #* Copyright (c) 2009, Lawrence Livermore National Security, LLC. * #* Produced at the Lawrence Livermore National Laboratory * #* Written by: Gavin M. Bell (gavin@llnl.gov), * #* Eugenia Gabrielova (gabrielov1@llnl.gov) * #* LLNL-CODE-420962 * #* * #* All rights reserved. This file is part of the: * #* Earth System Grid Fed (ESGF) Node Software Stack, Version 1.0 * #* * #* For details, see http://esgf.org/ * #* Please also read this link * #* http://esgf.org/LICENSE * #* * #* * Redistribution and use in source and binary forms, with or * #* without modification, are permitted provided that the following * #* conditions are met: * #* * #* * Redistributions of source code must retain the above copyright * #* notice, this list of conditions and the disclaimer below. * #* * #* * Redistributions in binary form must reproduce the above copyright * #* notice, this list of conditions and the disclaimer (as noted below) * #* in the documentation and/or other materials provided with the * #* distribution. * #* * #* Neither the name of the LLNS/LLNL nor the names of its contributors * #* may be used to endorse or promote products derived from this * #* software without specific prior written permission. * #* * #* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * #* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * #* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * #* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL LAWRENCE * #* LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR * #* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * #* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * #* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF * #* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * #* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * #* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT * #* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * #* SUCH DAMAGE. * #* * #**************************************************************************** ##### ##### # Description: Installer for ESG Compute Tools # Implemented Tools: Spark, Hadoop, Zookeeper # Tools in Progress: # * Yarn (Hadoop Streaming) # * Mesos (Cluster Management) # * Cascalog (Clojure-based query language for Hadoop) # * Scoobi (Scala framework for Hadoop) # Authors: Eugenia Gabrielova {gabrielov1@llnl.gov, genia.likes.science@gmail.com} #### ##### # uses: git, tar, wget ##### #-------------- # User Defined / Settable (public) #-------------- install_prefix=${install_prefix:-"/usr/local"} esg_root_dir=${esg_root_dir:-${ESGF_HOME:-"/esg"}} DEBUG=${DEBUG:-0} git_placeholder="$install_prefix/git" git_exec_path_param="--exec-path=$git_placeholder/libexec/git-core" java_placeholder="$install_prefix/java" java_install_path_config_param="--with-java-home=$java_placeholder/bin/java" compress_extensions=".tar.gz|.tar.bz2|.tgz|.bz2|.tar" envfile="/etc/esg.env" esg_functions_file=./esg-functions esg_compute_languages_file=./esg-compute-languages install_manifest=${install_manifest:-"${esg_root_dir}/esgf-install-manifest"} # esg_config_dir, create directory called esg_config_compute in esg_config_dir #-------------------------------- # External programs' versions #-------------------------------- zookeeper_version=${zookeeper_version:="3.3.5"} # zookeeper_min_version=${zookeeper_min_version:="3.3.3"} zookeeper_max_version=${zookeeper_max_version:="3.4.3"} mesos_version=${mesos_version:="0.9.0"} mesos_min_version=${mesos_min_version:="0.9.0"} spark_version=${spark_version:="0.5.0"} spark_min_version=${spark_min_version:="0.5.0"} hadoop_version=${hadoop_version:="1.0.3"} hadoop_min_version=${hadoop_min_version:="1.0.1"} hadoop_max_version=${hadoop_max_version:="1.0.4"} #-------------------------------- # External programs' script variables #-------------------------------- mesos_install_dir=${MESOS_HOME:-${install_prefix}/mesos} mesos_git_url="git://git.apache.org/mesos.git" mesos_build_dir=${MESOS_BUILD_HOME:-${install_prefix}/mesos} # TODO /mesos/build... spark_install_dir=${SPARK_HOME:-${install_prefix}/spark} spark_git_url="git://github.com/mesos/spark.git" zookeeper_install_dir=${ZOOKEEPER_HOME:-${install_prefix}/zookeeper} zookeeper_dist_url=http://www.gtlib.gatech.edu/pub/apache/zookeeper/zookeeper-${zookeeper_version}/zookeeper-${zookeeper_version}.tar.gz hadoop_install_dir=${HADOOP_HOME:-${install_prefix}/hadoop} hadoop_dist_url=http://www.gtlib.gatech.edu/pub/apache/hadoop/common/hadoop-${hadoop_version}/hadoop-${hadoop_version}.tar.gz hadoop_work_dir=${hadoop_work_dir:-${install_prefix}/hadoop_working} #------------------------------- # External programs' configuration variables #------------------------------- zookeeper_client_port=2181 zookeeper_workdir=${zookeeper_install_dir}/zookeeper_working zookeeper_confdir=${zookeeper_install_dir}/conf #------------------------------- # Internal script variables #------------------------------- date_format="+%Y_%m_%d_%H%M%S" [ -e "${envfile}" ] && source ${envfile} && ((VERBOSE)) && printf "sourcing environment from: ${envfile} \n" [ -e ${esg_functions_file} ] && source ${esg_functions_file} && ((VERBOSE)) && printf "sourcing from: ${esg_functions_file} \n" ##### # Mesos (Cluster manager for resource sharing across distirbuted applications) ##### setup_mesos() { # TODO Build mesos in build directory on install tree # Checking Mesos Version echo echo -n "Checking for Mesos >= ${mesos_min_version}" if [ -e ${mesos_install_dir} ]; then # Unconfigured Mesos will not be able to check its version if [ ! -e ${mesos_install_dir}/configure ]; then (cd ${mesos_install_dir} && ./bootstrap) fi local mesos_current_version=`${mesos_install_dir}/configure -version | head -1 | awk '{print $3}'` check_version_helper $mesos_current_version ${mesos_min_version} [ $? == 0 ] && (( ! force_install )) && echo " [OK]" && return 0 else echo echo "No Mesos installation detected" fi echo echo "**********************************" echo "Setting Up Mesos ${mesos_version}" echo "**********************************" echo # Retrieve Mesos Source and build after configuration for ESGF tools # Uses: Hadoop, Zookeeper, Java, Python git ${git_exec_path_param} clone ${mesos_git_url} ${mesos_install_dir} (cd ${mesos_install_dir} && ./bootstrap && ./configure --with-java=/usr/local/java --disable-python --disable-webui && make) # TODO let's make this config tastier... # TODO make check hangs on zookeeper install # Boilerplate: [ $? != 0 ] && echo " ERROR: Could not clone Mesos: ${mesos_git_url}" && popd && checked_done 1 # TODO Mesos /usr/local configuration # TODO ./configure with Hadoop, Zookeeper, Java, Python from /usr/local # Add Mesos home to environment and install manifest write_env_mesos write_mesos_install_log_entry } write_env_mesos() { echo "export MESOS_HOME=${mesos_install_dir}" >> ${envfile} dedup ${envfile} && source ${envfile} return 0 } write_mesos_install_log_entry() { local entry="$(date ${date_format}) esg-compute-tools:mesos=${mesos_version} ${mesos_install_dir}" echo ${entry} >> ${install_manifest} dedup ${install_manifest} return 0 } config_mesos() { # The most efficient way to start and stop mesos is with built-in deploy scripts, which # require some configuration. # [TODO] # Mesos and Hadoop config # # [TODO] # List of Mesos hosts and slaves # # [TODO] # LIBPROCESS_IP in MESOS_HOME/deploy/mesos-env.sh # # [TODO] Zookeeper URL Configuration for Mesos master running # Required for fault tolerance mode # First we check for Hadoop configuration - if it exists, request reconfigure, if not, # proceed automatically. # TODO Check existence of Hadoop relevant files # The following is a bit of a hack, but in polite terms, a "work in progress". echo doconfig="N" if [ -e ${mesos_install_dir/hadoop} ]; then read -e -p "Configure Mesos-Compatible Hadoop Installation? [y/N]: " doconfig # Configure Hadoop environment for Mesos if [ "$doconfig" = "Y" ] || [ "$doconfig" = "y" ]; then echo "Configuring Hadoop for Mesos..." # # Inform user of skipped configuration step else echo "Skipping Mesos Hadoop Configuration..." fi fi } test_mesos() { # Currently running Mesos + Zookeeper with custom deploy methods, tested here. # Test 1: Mesos + Spark # Test 2: Mesos + Hadoop # Test 3: Mesos + Hadoop + Spark echo echo "Test 1: Mesos with Spark Framework, [TODO Zookeeper Quorum Management]" run_mesos_stop_cluster stop_mesos_zookeeper start_mesos_zookeeper # TODO Yuck hard-coding #local zkflag_spark=zk://localhost:2181,localhost:2182,localhost:2183,localhost:2184,localhost:2185/znode #local sparkResult=$(${spark_install_dir}/run spark.examples.SparkPi $zkflag_spark) # >& /dev/null && echo $?) #echo -n "Tested Mesos Local Cluster with Spark... " #[ $sparkResult == 0 ] && echo "[ PASSED ]" || echo "[ FAILED ]" run_mesos_stop_cluster stop_mesos_zookeeper echo echo "Test 2: Mesos with Hadoop Framework, [TODO Zookeeper Quorum Management]" # Starts Zookeeper, starts Mesos, runs Hadoop pseudo-distributed test job on Mesos # TODO Starts Zookeeper echo -n "Tested Mesos Local Cluster with Legacy Hadoop... " echo "[ TBD ]" echo echo "Test 3: Mesos with Hadoop, Spark, and [TODO Zookeeper Quorum Management]" # Starts Zookeeper, starts Mesos, runs Spark job and Hadoop job on mesos echo -n "Tested Mesos Local Cluster with Hadoop, Spark... " echo "[ TBD ]" } start_mesos_zookeeper() { # Start Zookeeper Locally replicated server local quorum=5 for (( n=1; n <= $quorum; n++ )) do start_zookeeper_server ${zookeeper_confdir}/zoo_local_replicated_$n.cfg done # Start Mesos Master on locally replicated server+port local zkport=2181 local mesosport=5050 for (( p=1; p <= $quorum; p++ )) do # TODO This should be written to a file in Zookeeper's configuration... local flag="--zk=zk://localhost:2181,localhost:2182,localhost:2183,localhost:2184,localhost:2185/znode --port=$((mesosport++))" run_mesos_start_master $flag done # Start Zookeeper Slaves } stop_mesos_zookeeper() { local quorum=5 for (( n=1; n <= $quorum; n++ )) do stop_zookeeper_server ${zookeeper_confdir}/zoo_local_replicated_$n.cfg done stop_zookeeper_server ${zookeeper_confdir}/zoo_standalone.cfg } run_mesos_start_master() { # Takes as input any flags (besides quiet) to pass to the master # TODO Start with Zookeeper URL, ESGF IP address echo "Starting Mesos Master" ${mesos_build_dir}/bin/mesos-master.sh --quiet $@ & [ $? == 0 ] && echo "[ Mesos Master now running ]" || echo "[Mesos Master failed to run]" } run_mesos_start_slave() { # TODO Run with ESGF Ip address for mesos master url # TODO figure out correct memory resources echo "Starting Mesos Slave" local mesos_master_url=$(get_config_ip eth0) ${mesos_build_dir}/bin/mesos-slave.sh "--master=$1" "--resources=cpus:2;mem:1024" & } run_mesos_stop_cluster() { # Stop Master without parent shell termination notification master_pid=$(pgrep mesos-master) if [ $? == 0 ]; then echo -n "Stopping Mesos Master ..." kill $master_pid wait $master_pid 2>/dev/null echo "STOPPED" else echo "Mesos master not currently running" fi # Stop Slaves slave_pid=$(pgrep mesos-slave) if [ $? == 0 ]; then echo -n "Stopping Mesos Slaves ..." kill $slave_pid wait $slave_pid 2>/dev/null echo "STOPPED" else echo "Mesos slaves not currently running" fi } clean_mesos() { doit="N" if [ -e ${mesos_install_dir} ]; then read -e -p "remove cluster management framework Mesos? (${mesos_install_dir}) [y/N]: " doit if [ "doit" = "Y" ] || [ "$doit" = "y" ]; then echo "removing ${mesos_install_dir}" rm -rf ${mesos_install_dir} [ $? != 0 ] && echo "ERROR: Unable to remove ${mesos_install_dir}" remove_env MESOS_HOME remove_install_log_entry mesos fi fi } ##### # Spark (Mapreduce framework in Scala, targeted at iterative applications that make use of working # sets of data) ##### setup_spark() { # Checking for Spark Version echo echo -n "Checking for Spark >= ${spark_min_version}" if [ -e ${spark_install_dir} ]; then local spark_current_version=$(cd ${spark_install_dir} && sbt/sbt version | awk 'NR==5{split($3,array,"-")} END{print array[1]}' ) check_version_helper $spark_current_version ${spark_min_version} [ $? == 0 ] && (( ! force_install )) && echo " [OK]" && return else echo echo "No Spark installation detected" fi echo echo "*****************************" echo "Setting Up Spark ${spark_version}" echo "*****************************" echo # Retrieve and build Spark git ${git_exec_path_param} clone ${spark_git_url} ${spark_install_dir} echo "source ${envfile}" >> ${spark_install_dir}/conf/spark-env.sh # TODO Spark Java environment configuration (cd ${spark_install_dir} && sbt/sbt compile) # TODO check for download errors or git failures # Add Spark home to environment and install manifest write_env_spark write_spark_install_log_entry } write_env_spark() { echo "export SPARK_HOME=${spark_install_dir}" >> ${envfile} dedup ${envfile} && source ${envfile} return 0 } write_spark_install_log_entry() { local entry="$(date ${date_format}) esg-compute-tools:spark=${spark_version} ${spark_install_dir}" echo ${entry} >> ${install_manifest} dedup ${install_manifest} return 0 } config_spark() { echo doconfig="N" if [ -e ${spark_install_dir} ]; then read -e -p "Configure Spark Installation? [y/N]: " doconfig # Configure Spark environment, deployment, local/Mesos if [ "$doconfig" = "Y" ] || [ "$doconfig" = "y" ]; then echo "Configuring Spark..." # TODO Configure Spark here # Inform user of skipped configuration step else echo "Skipping Spark Configuration..." fi fi } test_spark() { # Test 1: Running a "local" version of Spark with 2 cores echo echo -n "Testing Spark - Local... " local sparkResult=$(cd ${spark_install_dir} && ./run spark.examples.SparkLR local[2] >& /dev/null && echo $?) [ $sparkResult == 0 ] && echo "[ PASSED ]" || echo "[ FAILED ]" } clean_spark() { doit="N" if [ -e ${spark_install_dir} ]; then read -e -p "remove iterative computation framework Spark? (${spark_install_dir}) [y/N]: " doit if [ "doit" = "Y" ] || [ "$doit" = "y" ]; then echo "removing ${spark_install_dir}" rm -rf ${spark_install_dir} [ $? != 0 ] && echo "ERROR: Unable to remove ${spark_install_dir}" remove_env SPARK_HOME remove_install_log_entry spark fi fi } ##### # Hadoop (MapReduce distributed computing on HDFS) ##### setup_hadoop() { echo # Check Hadoop Version echo -n "Checking for Hadoop >= ${hadoop_min_version}" if [ -e ${hadoop_install_dir} ]; then local hadoop_current_version=$(export HADOOP_HOME_WARN_SUPPRESS="TRUE" && \ ${hadoop_install_dir}/bin/hadoop version | head -1 | awk '{print $2}') local hadoop_version_number=${hadoop_current_version%-*} check_version_helper $hadoop_version_number ${hadoop_min_version} ${hadoop_max_version} [ $? == 0 ] && (( ! force_install )) && echo " [OK]" && return 0 else echo echo "No Hadoop installation detected" fi echo echo "*****************************" echo "Setting up Hadoop ${hadoop_version}" echo "*****************************" echo # Retrieve Hadoop Distribution File local hadoop_dist_file=${hadoop_dist_url##*/} local hadoop_dist_dir=$(echo ${hadoop_dist_file} | awk 'gsub(/('$compress_extensions')/,"")') # Check for empty distribution file (size 0) # TODO if [ ! -e ${hadoop_dist_dir} ]; then echo "Don't see Hadoop distribution directory ${hadoop_dist_dir}" wget -O "${install_prefix}/${hadoop_dist_file}" ${hadoop_dist_url} [ $? != 0 ] && echo " ERROR: Could not download Hadoop: ${hadoop_dist_file}" && popd && checked_done 1 echo "Unpacking ${hadoop_dist_file}..." tar xzf ${install_prefix}/${hadoop_dist_file} -C ${install_prefix} mv "${install_prefix}/${hadoop_dist_dir}" ${hadoop_install_dir} mkdir $ [ $? != 0 ] && echo " ERROR: Could not extract Hadoop: ${hadoop_dist_file}" && popd && checked_done 1 fi # Add Hadoop home to environment and install manifest write_env_hadoop write_hadoop_install_log_entry # Remove Hadoop Distribution File if [ -e "${install_prefix}/${hadoop_dist_file}" ]; then rm "${install_prefix}/${hadoop_dist_file}" fi # Create working diretory if [ ! -e ${hadoop_work_dir} ]; then mkdir ${hadoop_work_dir} fi } write_env_hadoop() { echo "export HADOOP_HOME_WARN_SUPPRESS=\"TRUE\"" >> ${envfile} echo "export HADOOP_HOME=${hadoop_install_dir}" >> ${envfile} echo "export JAVA_HOME=${install_prefix}/java" >> ${hadoop_install_dir}/conf/hadoop-env.sh dedup ${envfile} && source ${envfile} return 0 } write_hadoop_install_log_entry() { local entry="$(date ${date_format}) esg-compute-tools:hadoop=${hadoop_version} ${hadoop_install_dir}" echo ${entry} >> ${install_manifest} dedup ${install_manifest} return 0 } config_hadoop() { # This configuration method should prompt the user to enter settings pertaining to the type # of Hadoop distribution (local, semi-distributed, cluster distributed). # There is some info on getting Hadoop setup here: http://hadoop.apache.org/common/#Getting+Started # TODO Fully distributed operation requires a pretty hefty amount of configuration. Using the defaults # Hadoop provides is probably best. This configuration guide will be used for fully distributed configuration: # http://hadoop.apache.org/common/docs/r0.20.2/cluster_setup.html # Initialize configuration echo local doconfig="N" if [ -e ${hadoop_install_dir} ]; then read -e -p "Configure Hadoop Installation? [y/N]: " doconfig # Configure Hadoop modes, HDFS, configuration if [ "$doconfig" = "Y" ] || [ "$doconfig" = "y" ]; then echo "Configuring Hadoop..." # This configures both stand-alone and pseudo-distributed configuration files pseudo_distributed_config_setup echo "Configure default Hadoop distribution mode for this node:" # Stand-alone operation doesn't really require configuration, # as Hadoop just runs as a .jar file. Some examples may require an # input and output directory, but it is probably overkill to configure # that here. Configuring Hadoop to run local should be enough. echo "[1] Stand-alone operation (good for debugging)" echo "[2] Pseudo-distributed operation on local node" read -e -p "(Default: Stand-Alone Operation [2]): " distribution_mode # Pseudo-Distributed Local Operation if [ "$distribution_mode" = "2" ]; then # To learn more about Pseudo-Distributed operation or to fine-tune your configuration, # check out the Hadoop documentation here: # http://hadoop.apache.org/common/docs/r0.20.2/quickstart.html#PseudoDistributed # TODO: Write configuration to hadoop-config in esg-env here... echo "Hadoop: Psuedo-Distributed operation is now default for this node." # Standalone Local Operation else # TODO: Set standalone as default mode in ESG configuration for hadoop echo "Hadoop: Stand-Alone Hadoop Operation is now default for this node." fi # Inform user of skipped configuration step else echo "Skipping Hadoop Configuration..." fi fi } start_hadoop_local_distributed() { # TODO Start Hadoop with configuration files from ESG configuration directory echo "Hadoop: Starting hadoop in local distributed mode..." ${hadoop_install_dir}/bin/start-all.sh --config ${hadoop_install_dir}/conf/local_distributed >& /dev/null } stop_hadoop_local_distributed() { echo "Hadoop: Shutting down namenode and jobtracker..." ${hadoop_install_dir}/bin/stop-all.sh >& /dev/null } pseudo_distributed_config_setup() { # TODO Best practice would be to not keep these guys in Hadoop's install tree... # Create directories for standalone, local_distributed configuration # For now we destroy them first. rm -rf ${hadoop_install_dir}/conf/standalone rm -rf ${hadoop_install_dir}/conf/local_distributed rm -rf ${hadoop_install_dir}/conf/managed_cluster mkdir ${hadoop_install_dir}/conf/standalone mkdir ${hadoop_install_dir}/conf/local_distributed mkdir ${hadoop_install_dir}/conf/managed_cluster # Copy default example config files to new configuration directories cp ${hadoop_install_dir}/conf/*.xml ${hadoop_install_dir}/conf/standalone/ cp ${hadoop_install_dir}/conf/hadoop-env.sh ${hadoop_install_dir}/conf/standalone/hadoop-env.sh cp ${hadoop_install_dir}/conf/slaves ${hadoop_install_dir}/conf/standalone/slaves cp ${hadoop_install_dir}/conf/hadoop-env.sh ${hadoop_install_dir}/conf/local_distributed/hadoop-env.sh cp ${hadoop_install_dir}/conf/slaves ${hadoop_install_dir}/conf/local_distributed/slaves cp ${hadoop_install_dir}/conf/hadoop-env.sh ${hadoop_install_dir}/conf/managed_cluster/hadoop-env.sh cp ${hadoop_install_dir}/conf/*.xml ${hadoop_install_dir}/conf/managed_cluster/ cp ${hadoop_install_dir}/conf/slaves ${hadoop_install_dir}/conf/managed_cluster/slaves # Create configuration files for pseudo-distributed mode echo ' fs.default.name hdfs://localhost:9000 ' >> ${hadoop_install_dir}/conf/local_distributed/core-site.xml echo ' dfs.replication 1 ' >> ${hadoop_install_dir}/conf/local_distributed/hdfs-site.xml echo ' mapred.job.tracker localhost:9001 ' >> ${hadoop_install_dir}/conf/local_distributed/mapred-site.xml } test_hadoop() { echo # Hadoop Test - Local (Standalone) Mode test_hadoop_standalone # Hadoop Test 2 - Local Psuedo-Distributed test_hadoop_pseudo_distributed } test_hadoop_standalone() { # Hadoop Test - Local (Standalone) Mode, with wordcount example echo -n "Testing Hadoop - Local (Standalone) Mode... " mkdir ${hadoop_install_dir}/sandbox pushd ${hadoop_install_dir} >& /dev/null mkdir sandbox/input cp conf/*.xml sandbox/input bin/hadoop jar hadoop-examples-*.jar grep sandbox/input sandbox/output 'dfs[a-z.]+' >& /dev/null [ -e sandbox/output/_SUCCESS ] && echo "[ PASSED ]" || echo "[ FAILED ]" popd >& /dev/null rm -rf ${hadoop_install_dir}/sandbox } hadoop_temp_local_ssh () { # Setup a local ssh: this functionality is duplicated elsewhere in esg-node # installer but is needed temporarily for hadoop pseudo-distribution if [ ! -e ~/.ssh/id_dsa ]; then ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys fi } test_hadoop_pseudo_distributed() { # Generate temporary ssh keys hadoop_temp_local_ssh echo "Begin Testing Hadoop - Pseudo-Distributed Mode... " if [ -e ${hadoop_install_dir}/conf/local_distributed ]; then #${hadoop_install_dir}/bin/hadoop namenode -format start_hadoop_local_distributed ${hadoop_install_dir}/bin/hadoop fs -mkdir ${hadoop_work_dir}/input >& /dev/null ${hadoop_install_dir}/bin/hadoop fs -put ${hadoop_install_dir}/conf/local_distributed/* ${hadoop_work_dir}/input >& /dev/null # Fix hard-coding yo... ${hadoop_install_dir}/bin/hadoop jar ${hadoop_install_dir}/hadoop-examples-*.jar grep ${hadoop_work_dir}/input ${hadoop_work_dir}/output 'dfs[a-z.]+' >& /dev/null echo -n "Testing Hadoop - Pseudo-Distributed Mode... " [ -e ${hadoop_work_dir}/output/_SUCCESS ] && echo "[ PASSED ]" || echo "[ FAILED ]" ${hadoop_install_dir}/bin/hadoop fs -rmr ${hadoop_work_dir}/input >& /dev/null ${hadoop_install_dir}/bin/hadoop fs -rmr ${hadoop_work_dir}/output >& /dev/null stop_hadoop_local_distributed else echo echo "Hadoop not configured for Pseudo-distributed mode. Skipping test." fi } clean_hadoop() { doit="N" if [ -e ${hadoop_install_dir} ]; then read -e -p "remove mapreduce framework Hadoop? (${hadoop_install_dir}) [y/N]: " doit if [ "doit" = "Y" ] || [ "$doit" = "y" ]; then echo "removing ${hadoop_install_dir}" rm -rf ${hadoop_install_dir} [ $? != 0 ] && echo "ERROR: Unable to remove ${hadoop_install_dir}" remove_env HADOOP_HOME remove_install_log_entry hadoop fi fi } ##### # Zookeeper (Synchronization and quorum manager for clusters) ##### setup_zookeeper() { # Check ZooKeeper Version echo echo -n "Checking for Zookeeper >= ${zookeeper_min_version}" if [ -e ${zookeeper_install_dir} ] && [ -x ${zookeeper_install_dir}/bin/zkServer.sh ]; then local zookeeper_current_version=$(/bin/ls ${zookeeper_install_dir} | egrep '^zookeeper.*jar$' | sed 's/[^0-9.]*\([0-9.]*\)\..*/\1/') check_version_helper $zookeeper_current_version ${zookeeper_min_version} ${zookeeper_max_version} [ $? == 0 ] && (( ! force_install )) && echo " [OK]" && return 0 else echo echo "No Zookeeper installation detected" fi echo echo "*****************************" echo "Setting up Zookeeper ${zookeeper_version}" echo "*****************************" echo # Retrieve Zookeeper Distribution File local zookeeper_dist_file=${zookeeper_dist_url##*/} local zookeeper_dist_dir=$(echo ${zookeeper_dist_file} | awk 'gsub(/('$compress_extensions')/,"")') # Check for empty distribution file (size 0) # TODO if [ ! -e ${zookeeper_dist_dir} ]; then echo "Don't see Zookeeper distribution directory ${zookeeper_dist_dir}" wget -O "${install_prefix}/${zookeeper_dist_file}" ${zookeeper_dist_url} [ $? != 0 ] && echo " ERROR: Could not download Zookeeper: ${zookeeper_dist_file}" && popd && checked_done 1 echo "Unpacking ${zookeeper_dist_file}..." tar xzf ${install_prefix}/${zookeeper_dist_file} -C ${install_prefix} mv "${install_prefix}/${zookeeper_dist_dir}" ${zookeeper_install_dir} [ $? != 0 ] && echo " ERROR: Could not extract Zookeeper: ${zookeeper_dist_file}" && popd && checked_done 1 fi # Add Zookeeper home to environment and install manifest write_env_zookeeper write_zookeeper_install_log_entry } write_env_zookeeper() { echo "export ZOOKEEPER_HOME=${zookeeper_install_dir}" >> ${envfile} dedup ${envfile} && source ${envfile} return 0 } write_zookeeper_install_log_entry() { local entry="$(date ${date_format}) esg-compute-tools:zookeeper=${zookeeper_version} ${zookeeper_install_dir}" echo ${entry} >> ${install_manifest} dedup ${install_manifest} return 0 } config_zookeeper() { echo doconfig="N" if [ -e ${zookeeper_install_dir} ]; then read -e -p "Configure Zookeeper Installation? [y/N]: " doconfig if [ "$doconfig" = "Y" ] || [ "$doconfig" = "y" ]; then echo "Creating Zookeeper working directories..." if [ -e ${zookeeper_workdir} ]; then rm -rf ${zookeeper_workdir} fi mkdir ${zookeeper_workdir} mkdir ${zookeeper_workdir}/standalone mkdir ${zookeeper_workdir}/local_replication mkdir ${zookeeper_workdir}/cluster_replication echo "Configuring Zookeeper for standalone local operation..." echo "tickTime=2000" >> ${zookeeper_confdir}/zoo_standalone.cfg echo "dataDir=${zookeeper_workdir}/standalone" \ >> ${zookeeper_confdir}/zoo_standalone.cfg echo "clientPort=$zookeeper_client_port" \ >> ${zookeeper_confdir}/zoo_standalone.cfg # This is a bit wordy...but what else to do? Engage beast mode! We # need a separate configuration for each locally replicated # zookeeper configuration. For now, just create 5 local replicas, # though TODO make this a user provided configuration value later. # 5 is a good number for zookeeper replications because it is # greater than 3 (magic number), providing outliers; also, it is # because Zookeeper only supports failures that can result in a # majority. A 6-member quorum can only support 2 failures, as # 3 failures would result in a lack of majority. A 5-replica local # Zookeeper can support two failures, as three live instances still # provide a majority. echo "Configuring Zookeeper for locally replicated operation..." local default_client_port=$zookeeper_client_port local start_host=2888 local end_host=3888 local quorum_size=5 local zk_replicated_config=${zookeeper_confdir}/zoo_local_replicated local zk_datadir=${zookeeper_workdir}/local_replication/local for (( n=1; n <= $quorum_size; n++ )) do # Generate configuration file for this server instance local nth_zk_replicated_config=${zk_replicated_config}_$n.cfg local nth_zk_datadir=${zk_datadir}_$n local nth_client_port=$((default_client_port++)) # Create a data directory for this server instance, and a myid # file to identify it in runtime. The file contains a single # ASCII character for the server's id mkdir $nth_zk_datadir echo "$n" >> $nth_zk_datadir/myid echo "tickTime=2000" >> $nth_zk_replicated_config echo "dataDir=$nth_zk_datadir" >> $nth_zk_replicated_config echo "clientPort=$nth_client_port" >> $nth_zk_replicated_config echo "initLimit=5" >> $nth_zk_replicated_config echo "syncLimit=2" >> $nth_zk_replicated_config # Every Zookeeper instance must be aware of its fellow servers. # This coordination is achieved by writing a server:host line # to each server's configuration. local zhost_1=$start_host local zhost_2=$end_host for (( zk=1; zk <= $quorum_size; zk++ )) do echo "server.$zk=localhost:$((zhost_1++)):$((zhost_2++))" \ >> $nth_zk_replicated_config done done # TODO Configure Zookeeper file for Mesos here # TODO echo Configuring Zookeeper for cluster operation # This will be a lot like local replicated setup, except across # the cluster. More testing will be needed for that. else # Inform user of skipped configuration step echo "Skipping Zookeeper Configuration..." fi fi } test_zookeeper() { # Basic zookeeper test for client connection echo echo "Zookeeper Test 1: Standalone Local Server/Client Connection" start_zookeeper_server "${zookeeper_install_dir}/conf/zoo_standalone.cfg" ${zookeeper_install_dir}/bin/zkCli.sh -server 127.0.0.1:2181 ls / quit >& /dev/null zkret=$? echo -n "Tested Zookeeper local standalone configuration... " [ $zkret == 0 ] && echo " [ PASSED ]" || echo " [ FAILED ]" stop_zookeeper_server "${zookeeper_install_dir}/conf/zoo_standalone.cfg" # Zookeeper test for locally replicated cluster echo echo "Zookeeper Test 2: Local Replication Test" echo -n "Tested Zookeeper Local Replication with N Quorum Members" echo "[ TBD ]" # TODO Cluster replication test } start_zookeeper_server() { # Starts Zookeeper server with a given configuration file echo "[ Starting Zookeeper... ]" ${zookeeper_install_dir}/bin/zkServer.sh start $1 >& /dev/null [ $? == 0 ] && echo "[ Zookeeper running from configuration: $1 ]" || echo "[Zookeeper failed to run, see configuration: $1 ]" } stop_zookeeper_server() { # TODO use command `ps -elfwww | grep -i zookeeper` to parse out currently # running configuration files ${zookeeper_install_dir}/bin/zkServer.sh stop $1 >& /dev/null [ $? == 0 ] && echo "[Stopping Zookeeper ... STOPPED]" \ || echo "[Stopping Zookeeper ... FAILED, check your configuration]" } clean_zookeeper() { doit="N" if [ -e ${zookeeper_install_dir} ]; then read -e -p "remove cluster management tool Zookeeper? (${zookeeper_install_dir}) [y/N]: " doit if [ "doit" = "Y" ] || [ "$doit" = "y" ]; then echo "removing ${zookeeper_install_dir}" rm -rf ${zookeeper_install_dir} [ $? != 0 ] && echo "ERROR: Unable to remove ${zookeeper_install_dir}" remove_env ZOOKEEPER_HOME remove_install_log_entry zookeeper fi fi } ##### # Core Methods ##### clean_compute_tools() { # TODO: A few of these computation frameworks rely on one another, # so uninstalling portions of this suite may leave parts unstable. Some # sort of sanity check would be useful here. clean_spark clean_mesos clean_hadoop clean_zookeeper } test_compute_tools() { # Method to run test suites of all compute tools. echo echo "---------------------------------" echo " Testing ESGF Node Compute Tools " echo "---------------------------------" echo test_hadoop test_zookeeper test_mesos test_spark } config_compute_tools() { # Method to run configuration methods of all compute tools. echo echo "---------------------------------" echo " Configuring ESGF Node Compute Tools " echo "---------------------------------" echo config_hadoop config_zookeeper config_mesos config_spark } setup_compute_tools() { echo echo "-------------------------------------------------------------" echo "Installing ESGF Node Compute Tools {Hadoop, Zookeeper, Mesos, Spark}" echo "-------------------------------------------------------------" # The optimal install order, as below, depends on the following: # 1. Hadoop # 2. Zookeeper # 3. Mesos # 4. Spark (Requires Mesos for distributed computation) # Future: Mesos, Yarn (currently on Hadoop Master), Cascalog (Clojure query language for Hadoop) echo echo "---------------------------------" echo " Installing Hadoop - Latest Stable Version" echo "---------------------------------" setup_hadoop config_hadoop test_hadoop echo echo "---------------------------------" echo " Installing Zookeeper" echo "---------------------------------" setup_zookeeper config_zookeeper test_zookeeper echo echo "---------------------------------" echo " Installing Mesos and Mesos-Compatible Hadoop" echo "---------------------------------" setup_mesos config_mesos test_mesos echo echo "---------------------------------" echo " Installing Spark" echo "---------------------------------" setup_spark config_spark test_spark } if [[ "$BASH_SOURCE" == "$0" ]] then setup_compute_tools fi