前置准备
-
准备三台机器
cat /etc/hosts
192.168.1.7 hadoop-master
192.168.1.11 hadoop-slave01
192.168.1.12 hadoop-slave02 -
Linux 环境
cat /etc/os-release
PRETTY_NAME="Ubuntu 24.10"
NAME="Ubuntu"
VERSION_ID="24.10"
VERSION="24.10 (Oracular Oriole)"
VERSION_CODENAME=oracular
ID=ubuntu
ID_LIKE=debian
HOME_URL="https://www.ubuntu.com/"
SUPPORT_URL="https://help.ubuntu.com/"
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
UBUNTU_CODENAME=oracular
LOGO=ubuntu-logo -
配置三台机器免密登录
参考
https://blog.csdn.net/xchenhao/article/details/105005312
-
在三台机器配置 JDK 环境
参考
https://blog.csdn.net/xchenhao/article/details/144866090
-
Hadoop
下载
https://hadoop.apache.org/releases.html
规划
hadoop-master | hadoop-slave01 | hadoop-slave02 | |
---|---|---|---|
HDFS | NameNode DataNode | DataNode | Secondary NameNode |
YARN | NodeManager JobHistoryServer | ResourceManager NodeManager | NodeManager |
操作
在 hadoop-master 执行
shell
# 下载 hadoop 压缩包
$ wget https://dlcdn.apache.org/hadoop/common/hadoop-3.4.1/hadoop-3.4.1.tar.gz
$ mkdir /opt
$ tar xvf hadoop-3.4.1.tar.gz -C /opt
$ cd /opt/hadoop-3.4.1
# 配置 HADOOP_HOME 环境变量
$ echo 'export HADOOP_HOME=/opt/hadoop-3.4.1' >> /etc/profile
# 将 hadoop 相关命令添加至 PATH 中
$ echo 'export PATH=$HADOOP_HOME/bin:$PATH' >> /etc/profile
source /etc/profile
# 查看 hadoop 版本
$ hadoop version
Hadoop 3.4.1
Source code repository https://github.com/apache/hadoop.git -r 4d7825309348956336b8f06a08322b78422849b1
Compiled by mthakur on 2024-10-09T14:57Z
Compiled on platform linux-x86_64
Compiled with protoc 3.23.4
From source with checksum 7292fe9dba5e2e44e3a9f763fce3e680
This command was run using /opt/hadoop-3.4.1/share/hadoop/common/hadoop-common-3.4.1.jar
# 创建 hadoop 数据目录
$ mkdir $HADOOP_HOME/data
在 hadoop-master 调整配置
-
$HADOOP_HOME/etc/hadoop/hadoop-env.sh
在文件中添加JAVA_HOME=/opt/jdk1.8.0_431
-
$HADOOP_HOME/etc/hadoop/core-site.xml
xml
<configuration>
<!--hdfs 的 配置-->
<!-- 设置 namenode 节点 -->
<!-- 注意: hadoop1.x时代默认端⼝9000 hadoop2.x时代默认端⼝8020 hadoop3.x时 代默认端⼝ 9820 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop-master:9820</value>
</property>
<!-- hdfs的基础路径,被其他属性所依赖的⼀个基础路径 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop-3.4.1/data</value>
</property>
<!--配置 HDFS 网页登录使用的静态用户为 xchenhao -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>xchenhao</value>
</property>
</configuration>
$HADOOP_HOME/etc/hadoop/hdfs-site.xml
xml
<configuration>
<!-- namenode 守护进程的http地址:主机名和端⼝号。参考守护进程布局 -->
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop-master:9870</value>
</property>
<!--secondarynamenode 守护进程的 http 地址:主机名和端⼝号。参考守护进程布局 -->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop-slave02:9868</value>
</property>
</configuration>
$HADOOP_HOME/etc/hadoop/mapred-site.xml
xml
<configuration>
<!--指定 MapReduce 程序运行在Yarn上-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--历史服务器端地址-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop-master:10020</value>
</property>
<!--历史服务器 web 端地址-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop-master:19888</value>
</property>
</configuration>
-
$HADOOP_HOME/etc/hadoop/workers
hadoop-master
hadoop-slave01
hadoop-slave02 -
$HADOOP_HOME/etc/hadoop/yarn-site.xml
xml
<configuration>
<!--指定 MR 走 shuffle -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--指定 ResourceManager 的地址-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop-slave01</value>
</property>
<!--环境变量的继承-->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<!--开启日志聚集功能-->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!--设置日志聚集服务器地址-->
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop102:19888/jobhistory/logs</value>
</property>
<!--设置日志保留时间为7天-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
将 hadoop-master 相关文件分发至 hadoop-slave01、hadoop-slave02
文件或目录 |
---|
/opt/hadoop-3.4.1 |
/etc/profile |
注:可用 rsync 命令进行文件分发 xrsync.sh /opt/hadoop-3.4.1
脚本如下:
shell
#!/bin/bash
#1. 判断参数个数
if [ $# -lt 1 ]
then
echo Not Enough Arguement!
exit;
fi
#2. 遍历集群所有机器
for host in hadoop-master hadoop-slave01 hadoop-slave02
do
echo ==================== $host ====================
#3. 遍历所有目录,挨个发送
for file in $@
do
#4. 判断文件是否存在
if [ -e $file ]
then
#5. 获取父目录
pdir=$(cd -P $(dirname $file); pwd)
#6. 获取当前文件的名称
fname=$(basename $file)
ssh $host "mkdir -p $pdir"
rsync -av $pdir/$fname $host:$pdir
else
echo $file does not exists!
fi
done
done
验证
(1)准备群集启/停脚本 xhadoop.sh
脚本如下:
shell
#!/bin/bash
if [ $# -lt 1 ]
then
echo "No Args Input..."
exit ;
fi
case $1 in
"start")
echo " =================== 启动hadoop集群==================="
echo " ---------------启动hdfs ---------------"
ssh hadoop-master "source /etc/profile && $HADOOP_HOME/sbin/start-dfs.sh"
echo " ---------------启动yarn ---------------"
ssh hadoop-slave01 "source /etc/profile && $HADOOP_HOME/sbin/start-yarn.sh"
echo " ---------------启动historyserver ---------------"
ssh hadoop-master "source /etc/profile && $HADOOP_HOME/bin/mapred --daemon start historyserver"
;;
"stop")
echo " =================== 关闭hadoop集群==================="
echo " ---------------关闭historyserver ---------------"
ssh hadoop-master "source /etc/profile && $HADOOP_HOME/bin/mapred --daemon stop historyserver"
echo " ---------------关闭yarn ---------------"
ssh hadoop-slave01 "source /etc/profile && $HADOOP_HOME/sbin/stop-yarn.sh"
echo " ---------------关闭hdfs ---------------"
ssh hadoop-master "source /etc/profile && $HADOOP_HOME/sbin/stop-dfs.sh"
;;
*)
echo "Input Args Error..."
;;
esac
(2)启动集群
shell
$ xhadoop.sh start
=================== 启动hadoop集群===================
---------------启动hdfs ---------------
Starting namenodes on [hadoop-master]
Starting datanodes
Starting secondary namenodes [hadoop-slave02]
---------------启动yarn ---------------
Starting resourcemanager
Starting nodemanagers
---------------启动historyserver ---------------
(3)查看各机器 hadoop 组件进程
shell
$ jpsall.sh
=============== hadoop-master ===============
12245 DataNode
12549 NodeManager
12729 JobHistoryServer
12108 NameNode
12959 Jps
=============== hadoop-slave01 ===============
11584 DataNode
12368 Jps
11907 NodeManager
11775 ResourceManager
=============== hadoop-slave02 ===============
7446 SecondaryNameNode
7558 NodeManager
7309 DataNode
7791 Jps
脚本如下
shell
#!/bin/sh
for host in hadoop-master hadoop-slave01 hadoop-slave02
do
echo =============== $host ===============
ssh $host "source /etc/profile && $JAVA_HOME/bin/jps"
done
(4)相关后台
NameNode 相关信息 Web 后台 | http://hadoop-master:9870 |
Yarn Web 后台 | http://hadoop-slave01:8088 |
HistoryServer 后台 | http://hadoop-master:19888 |
(5)停止集群
shell
$ xhadoop.sh stop
$ jpsall.sh