hosts
shell
[root@kafka01 hadoop-script]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.100.150 kafka01
192.168.100.151 kafka02
192.168.100.152 kafka03
一、准备工作:主机清单文件
本地已下载hadoop-3.2.2.tar.gz,路径为/root/hadoop-3.2.2.tar.gz
创建 inventory.ini
作为Ansible主机清单(定义节点分组和IP):
ini
# inventory.ini
[kafka]
192.168.100.150 # kafka01(Hadoop主节点)
192.168.100.151 # kafka02(Hadoop从节点)
192.168.100.152 # kafka03(Hadoop从节点)
[work] # Hadoop从节点组(与kafka02、kafka03对应)
192.168.100.151
192.168.100.152
二、Ansible批量部署命令(主节点执行)
以下命令按顺序执行,完成依赖安装、Hadoop分发、配置同步和环境变量设置:
bash
# 1. 所有节点安装依赖(rsync用于同步配置,Java是Hadoop运行依赖)
ansible all -i inventory.ini -m yum -a "name=rsync,java-11-openjdk-devel state=present" -b
# 2. 主节点(192.168.100.150)上传Hadoop安装包并解压到/opt
# (本地已下载hadoop-3.2.2.tar.gz,路径为/root/hadoop-3.2.2.tar.gz)
ansible 192.168.100.150 -i inventory.ini -m copy -a "src=/root/hadoop-3.2.2.tar.gz dest=/root/" -b
ansible 192.168.100.150 -i inventory.ini -m shell -a "tar -zxvf /root/hadoop-3.2.2.tar.gz -C /opt/" -b
# 3. 主节点执行配置脚本(生成Hadoop配置文件)
cd /opt/hadoop-3.2.2/
bash onekey_install.sh
# 4. 同步主节点配置到所有从节点(work组)
ansible work -i inventory.ini -m synchronize -a "src=/opt/hadoop-3.2.2/ dest=/opt/hadoop-3.2.2/ rsync_opts='--exclude=logs/* --exclude=pid/* --exclude=data/*'" -b
# 5. 所有节点配置Hadoop环境变量(永久生效)
ansible all -i inventory.ini -m lineinfile -a "path=/etc/profile line='export HADOOP_HOME=/opt/hadoop-3.2.2' state=present" -b
ansible all -i inventory.ini -m lineinfile -a "path=/etc/profile line='export PATH=\$PATH:\$HADOOP_HOME/bin:\$HADOOP_HOME/sbin' state=present" -b
# 6. 所有节点使环境变量立即生效
ansible all -i inventory.ini -m shell -a "source /etc/profile" -b
三、核心脚本:配置与启动
以下是整合后的关键脚本(已修正并确保兼容性):
1. 配置脚本 onekey_install.sh
(路径:/opt/hadoop-3.2.2/onekey_install.sh
)
bash
#!/bin/bash
# 获取Hadoop安装根目录
HADOOP_HOME=$(pwd)
echo -e "\e[32m修改hadoop配置文件\e[0m"
echo "Hadoop安装根目录: $HADOOP_HOME"
# 检测Java路径并设置(自动获取当前节点的Java路径)
JAVA_PATH=$(dirname $(dirname $(readlink -f $(which java))))
if [ -z "$JAVA_PATH" ]; then
echo -e "\e[31m错误: 未找到Java安装\e[0m"
exit 1
fi
# 创建hadoop需要的目录(PID、日志、数据目录)
echo "创建hadoop需要的目录"
mkdir -p "$HADOOP_HOME/pid" || { echo -e "\e[31m创建PID目录失败\e[0m"; exit 1; }
mkdir -p "$HADOOP_HOME/logs" || { echo -e "\e[31m创建日志目录失败\e[0m"; exit 1; }
mkdir -p "$HADOOP_HOME/data/{tmp,namenode,datanode}" || { echo -e "\e[31m创建数据目录失败\e[0m"; exit 1; }
# 配置hadoop-env.sh(核心环境变量)
cat > "$HADOOP_HOME/etc/hadoop/hadoop-env.sh" << EOF
export JAVA_HOME=$JAVA_PATH # 自动适配当前节点Java路径
export HADOOP_HOME=$HADOOP_HOME
# 允许root用户运行Hadoop组件
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
# 定义PID和日志路径(位于Hadoop根目录)
export HADOOP_PID_DIR=$HADOOP_HOME/pid
export HADOOP_LOG_DIR=$HADOOP_HOME/logs
EOF
# 配置core-site.xml(HDFS核心配置)
cat > "$HADOOP_HOME/etc/hadoop/core-site.xml" << EOF
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>$HADOOP_HOME/data/tmp</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://kafka01:9820</value> <!-- 主节点主机名kafka01 -->
</property>
</configuration>
EOF
# 配置hdfs-site.xml(HDFS存储与Web配置)
cat > "$HADOOP_HOME/etc/hadoop/hdfs-site.xml" << EOF
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value> <!-- 副本数(与从节点数量匹配) -->
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>$HADOOP_HOME/data/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>$HADOOP_HOME/data/datanode</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>kafka01:9870</value> <!-- NameNode Web地址 -->
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>kafka02:9868</value> <!-- 从节点kafka02作为SecondaryNameNode -->
</property>
</configuration>
EOF
# 配置mapred-site.xml(MapReduce依赖YARN)
cat > "$HADOOP_HOME/etc/hadoop/mapred-site.xml" << EOF
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
EOF
# 配置yarn-site.xml(YARN资源管理)
cat > "$HADOOP_HOME/etc/hadoop/yarn-site.xml" << EOF
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>kafka01</value> <!-- YARN主节点 -->
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value> <!-- 启用Shuffle服务 -->
</property>
</configuration>
EOF
# 配置workers(从节点列表)
cat > "$HADOOP_HOME/etc/hadoop/workers" << EOF
kafka02
kafka03
EOF
echo -e "\e[32mHadoop配置完成!\e[0m"
2. 启动脚本 start-hadoop-cluster.sh
(路径:/opt/hadoop-3.2.2/start-hadoop-cluster.sh
)
bash
#!/bin/bash
# 确保脚本在Hadoop根目录执行
HADOOP_HOME=$(pwd)
echo "Hadoop安装根目录: $HADOOP_HOME"
# 检查配置目录是否存在
if [ ! -d "$HADOOP_HOME/etc/hadoop" ]; then
echo -e "\e[31m错误: Hadoop配置目录不存在! 请先运行配置脚本\e[0m"
exit 1
fi
# 添加Hadoop命令到PATH(兼容未生效的环境变量)
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
# 函数: 格式化NameNode(首次启动必须)
format_namenode() {
read -p "是否需要格式化NameNode? (首次启动必须格式化) [y/N]: " answer
if [[ "$answer" == "y" || "$answer" == "Y" ]]; then
echo -e "\e[33m正在格式化NameNode...\e[0m"
hdfs namenode -format || { echo -e "\e[31mNameNode格式化失败!\e[0m"; exit 1; }
echo -e "\e[32mNameNode格式化完成!\e[0m"
else
echo -e "\e[33m跳过NameNode格式化...\e[0m"
fi
}
# 函数: 启动HDFS并验证(优化:检查所有节点)
start_hdfs() {
echo -e "\e[32m正在启动HDFS...\e[0m"
$HADOOP_HOME/sbin/start-dfs.sh || { echo -e "\e[31mHDFS启动失败!\e[0m"; exit 1; }
# 验证HDFS关键进程
echo -e "\e[33m验证HDFS服务状态...\e[0m"
if ansible 192.168.100.150 -i inventory.ini -m shell -a "jps | grep NameNode" -b >/dev/null; then
echo -e "\e[32mkafka01: NameNode已启动\e[0m"
else
echo -e "\e[31mkafka01: NameNode未运行!\e[0m"
fi
if ansible work -i inventory.ini -m shell -a "jps | grep DataNode" -b >/dev/null; then
echo -e "\e[32m从节点: DataNode已启动\e[0m"
else
echo -e "\e[31m从节点: DataNode未运行!\e[0m"
fi
if ansible 192.168.100.151 -i inventory.ini -m shell -a "jps | grep SecondaryNameNode" -b >/dev/null; then
echo -e "\e[32mkafka02: SecondaryNameNode已启动\e[0m"
else
echo -e "\e[31mkafka02: SecondaryNameNode未运行!\e[0m"
fi
}
# 函数: 启动YARN并验证(优化:检查所有节点)
start_yarn() {
echo -e "\e[32m正在启动YARN...\e[0m"
$HADOOP_HOME/sbin/start-yarn.sh || { echo -e "\e[31mYARN启动失败!\e[0m"; exit 1; }
# 验证YARN关键进程
echo -e "\e[33m验证YARN服务状态...\e[0m"
if ansible 192.168.100.150 -i inventory.ini -m shell -a "jps | grep ResourceManager" -b >/dev/null; then
echo -e "\e[32mkafka01: ResourceManager已启动\e[0m"
else
echo -e "\e[31mkafka01: ResourceManager未运行!\e[0m"
fi
if ansible work -i inventory.ini -m shell -a "jps | grep NodeManager" -b >/dev/null; then
echo -e "\e[32m从节点: NodeManager已启动\e[0m"
else
echo -e "\e[31m从节点: NodeManager未运行!\e[0m"
fi
}
# 函数: 显示Web界面
show_web_interfaces() {
echo -e "\n\e[34m======== Hadoop Web界面 ========\e[0m"
echo -e "\e[36mNameNode: http://kafka01:9870\e[0m"
echo -e "\e[36mResourceManager: http://kafka01:8088\e[0m"
echo -e "\e[36mSecondaryNameNode: http://kafka02:9868\e[0m"
echo -e "\e[34m=================================\e[0m\n"
}
# 主执行流程
echo -e "\e[32m开始启动Hadoop集群...\e[0m"
format_namenode
start_hdfs
start_yarn
show_web_interfaces
echo -e "\e[32mHadoop集群启动完成!\e[0m"
四、执行启动与验证
-
在主节点(192.168.100.150)执行启动脚本:
bashcd /opt/hadoop-3.2.2 chmod +x start-hadoop-cluster.sh ./start-hadoop-cluster.sh
-
验证集群状态:
bash# 检查所有节点Java进程 ansible all -i inventory.ini -m shell -a "jps" -b # 测试HDFS功能 hdfs dfs -mkdir /test hdfs dfs -put ./LICENSE.txt /test hdfs dfs -ls /test #运行 Hadoop 自带的示例 MapReduce 作业(计算圆周率) hadoop jar /opt/hadoop-3.2.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.2.2.jar pi 10 100