系统:Ubuntu 20.04 / 22.04 / 24.04(服务器或桌面均可)
所有操作直接用 root 执行(最简单粗暴)
一、准备工作(只需执行一次)
bash
# 更新系统 + 安装必备工具
apt update && apt install -y openjdk-8-jdk wget tar ssh rsync net-tools
# 确认 Java 8
java -version # 必须显示 openjdk version "1.8.0_xxx"
二、一键下载并解压 Hadoop 3.2.4
bash
cd /opt
wget https://archive.apache.org/dist/hadoop/common/hadoop-3.2.4/hadoop-3.2.4.tar.gz
tar -xzvf hadoop-3.2.4.tar.gz
mv hadoop-3.2.4 hadoop-3.2.4
chown -R root:root hadoop-3.2.4
三、一键写死所有环境变量(永久生效)
bash
cat > /etc/profile.d/hadoop.sh << 'EOF'
export HADOOP_HOME=/opt/hadoop-3.2.4
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
EOF
source /etc/profile.d/hadoop.sh
四、解决所有"xxx_USER not defined"报错(必须加这5行)
bash
cat >> $HADOOP_HOME/etc/hadoop/hadoop-env.sh << 'EOF'
# 解决 root 启动报错
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
EOF
五、配置 core-site.xml 和 hdfs-site.xml(单机最小配置)
bash
# core-site.xml
cat > $HADOOP_HOME/etc/hadoop/core-site.xml << 'EOF'
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
EOF
# hdfs-site.xml
cat > $HADOOP_HOME/etc/hadoop/hdfs-site.xml << 'EOF'
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/hadoop-3.2.4/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///opt/hadoop-3.2.4/hdfs/datanode</value>
</property>
</configuration>
EOF
六、解决 SSH 无密码登录(单机必须)
bash
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
ssh localhost exit # 第一次会提示 yes,之后就免密了
七、格式化 NameNode + 启动 Hadoop
bash
# 只需执行一次
hdfs namenode -format
# 启动(以后每天就跑这三条)
start-dfs.sh
start-yarn.sh
# 检查是否全部起来
jps
# 正确输出:
# NameNode
# DataNode
# SecondaryNameNode
# ResourceManager
# NodeManager
八、验证 HDFS 是否正常
bash
hdfs dfs -mkdir /test
hdfs dfs -put /etc/profile /test/
hdfs dfs -ls /test
# 能看到 profile 文件就成功了
至此,Hadoop 3.2.4 单机版已 100% 安装完成!
九、可选:顺手把 Hive 3.1.3 也装好(接上文继续执行即可)
bash
cd /opt
wget https://archive.apache.org/dist/hive/hive-3.1.3/apache-hive-3.1.3-bin.tar.gz
tar -xzvf apache-hive-3.1.3-bin.tar.gz
mv apache-hive-3.1.3-bin hive-3.1.3
# 加入环境变量
echo 'export HIVE_HOME=/opt/hive-3.1.3' >> /etc/profile.d/hadoop.sh
echo 'export PATH=$PATH:$HIVE_HOME/bin' >> /etc/profile.d/hadoop.sh
source /etc/profile.d/hadoop.sh
# 解决 Guava 冲突(关键!)
cd $HIVE_HOME/lib
mv guava-19.0.jar guava-19.0.jar.bak 2>/dev/null || true
cp $HADOOP_HOME/share/hadoop/hdfs/lib/guava-27.0-jre.jar ./
# 创建最干净的 hive-site.xml
cat > $HIVE_HOME/conf/hive-site.xml << 'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property><name>javax.jdo.option.ConnectionURL</name><value>jdbc:derby:;databaseName=metastore_db;create=true</value></property>
<property><name>javax.jdo.option.ConnectionDriverName</name><value>org.apache.derby.jdbc.EmbeddedDriver</value></property>
<property><name>hive.metastore.warehouse.dir</name><value>/user/hive/warehouse</value></property>
</configuration>
EOF
# 创建 HDFS 目录
hdfs dfs -mkdir -p /user/hive/warehouse /tmp
hdfs dfs -chmod g+w /user/hive/warehouse /tmp
# 初始化 + 启动
schematool -dbType derby -initSchema
hive # 成功进入 hive> 提示符