说明
可在中国地区网络部署。
本文展示的Hadoop v3.1.1
支持数据持久化存储到数据卷。
当前未解决的问题
- 无法自动配置多个namenode和datanode,咨询了老外,老外说,需要一个类似apache doris operator的东西去实现多副本的自动配置。暂时不会搞,待定。
- namenode解析datanode失败问题。只能使用特殊配置规避,已经在配置文件设置。在configmap配置文件 hdfs-site.xml有说明。
前置条件
推荐使用Hadoop在docker hub 的官方镜像。
文章已经介绍制作Hadoop容器镜像的方法,参考文档https://juejin.cn/post/7575810396201959458
部署方法
准备配置文件
务必注意文件格式。
configmap.yaml
bash
apiVersion: v1
kind: ConfigMap
metadata:
name: hadoop-config
namespace: bigdata4
data:
core-site.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmpdata</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://namenode-0.namenode.bigdata4:9000</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoop/data/nn</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop/data/dn</value>
</property>
<property>
<name>dfs.namenode.rpc-address</name>
<value>namenode-0.namenode.bigdata4:9000</value>
</property>
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.http-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
<description>当namenode反向解析datanode失败时,改为false。</description>
</property>
</configuration>
yarn-site.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>resourcemanager-0.resourcemanager.bigdata4</value>
</property>
<property>
<name>yarn.resourcemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>0.0.0.0:8088</value>
</property>
<property>
<name>yarn.nodemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.delete.debug-delay-sec</name>
<value>600</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.acl.enable</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
mapred-site.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop</value>
</property>
</configuration>
capacity-scheduler.xml: |
<?xml version="1.0"?>
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-applications</name>
<value>10000</value>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.1</value>
</property>
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DefaultResourceCalculator</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>default</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.user-limit-factor</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.state</name>
<value>RUNNING</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_submit_applications</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_administer_queue</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.node-locality-delay</name>
<value>40</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.acl_application_max_priority</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.default.maximum-application-lifetime</name>
<value>-1</value>
</property>
<property>
<name>yarn.scheduler.capacity.rack-locality-additional-delay</name>
<value>-1</value>
</property>
</configuration>
namenode.yaml
bash
apiVersion: v1
kind: Service
metadata:
name: namenode
namespace: bigdata4
labels:
app: hadoop-namenode
spec:
ports:
- port: 9870
name: web
- port: 9000
name: rpc
clusterIP: None
selector:
app: hadoop-namenode
---
apiVersion: v1
kind: Service
metadata:
name: namenode-external
namespace: bigdata4
labels:
app: hadoop-namenode
spec:
type: NodePort
ports:
- port: 9870
name: web
targetPort: 9870
nodePort: 30070
- port: 9000
name: rpc
targetPort: 9000
nodePort: 30090
selector:
app: hadoop-namenode
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: namenode
namespace: bigdata4
spec:
serviceName: namenode
replicas: 1
selector:
matchLabels:
app: hadoop-namenode
template:
metadata:
labels:
app: hadoop-namenode
spec:
# --- 1. 初始化容器:负责权限管理、目录准备和按需格式化 ---
initContainers:
- name: init-namenode
image: docker.1ms.run/zhuyifeiruichuang/hadoop:3.1.1
imagePullPolicy: IfNotPresent
securityContext:
runAsUser: 0
privileged: true
command: ["/bin/bash", "-c"]
args:
- |
set -e
echo "Step 1: Preparing directories and permissions..."
mkdir -p /opt/hadoop/data/nn
# 确保 hadoop 用户拥有数据目录权限
chown -R hadoop:hadoop /opt/hadoop/data
echo "Step 2: Checking if NameNode needs formatting..."
if [ ! -f /opt/hadoop/data/nn/current/VERSION ]; then
echo "No VERSION file found. Formatting NameNode..."
# 使用非交互模式强制格式化
su hadoop -c "hdfs namenode -format -nonInteractive"
echo "Formatting completed successfully."
else
echo "NameNode already formatted (VERSION file exists). Skipping."
grep -i "clusterID" /opt/hadoop/data/nn/current/VERSION
fi
volumeMounts:
- name: hadoop-nn-data
mountPath: /opt/hadoop/data/nn
subPath: nn
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
# --- 2. 主容器:仅负责运行 NameNode 服务 ---
containers:
- name: namenode
image: docker.1ms.run/zhuyifeiruichuang/hadoop:3.1.1
imagePullPolicy: IfNotPresent
command: ["/bin/bash", "-c"]
# 使用 exec 确保 namenode 是 PID 1,能够接收系统信号实现优雅停机
args: ["exec hdfs namenode"]
env:
- name: HADOOP_CONF_DIR
value: "/opt/hadoop/etc/hadoop"
- name: HADOOP_LOG_DIR
value: "/opt/hadoop/logs"
- name: HADOOP_HEAPSIZE
value: "1024"
ports:
- containerPort: 9870
name: web
- containerPort: 9000
name: rpc
# 资源限制:保证 Pod 的 QOS 等级
resources:
requests:
memory: "2Gi"
cpu: "500m"
limits:
memory: "4Gi"
cpu: "1000m"
# 健康检查:通过 JMX 接口确保 NameNode 真正可用
readinessProbe:
httpGet:
path: /jmx
port: 9870
initialDelaySeconds: 40
periodSeconds: 10
livenessProbe:
httpGet:
path: /jmx
port: 9870
initialDelaySeconds: 100
periodSeconds: 30
volumeMounts:
- name: hadoop-nn-data
mountPath: /opt/hadoop/data/nn
subPath: nn
- name: hadoop-logs
mountPath: /opt/hadoop/logs
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
volumes:
- name: hadoop-config-volume
configMap:
name: hadoop-config
- name: hadoop-logs
emptyDir: {}
# --- 3. 存储定义 ---
volumeClaimTemplates:
- metadata:
name: hadoop-nn-data
spec:
accessModes: [ "ReadWriteOnce" ]
storageClassName: "local"
resources:
requests:
storage: 20Gi
nodemanager.yaml
bash
apiVersion: v1
kind: Service
metadata:
name: nodemanager
namespace: bigdata4
labels:
app: hadoop-nodemanager
spec:
ports:
- port: 8042
name: web
clusterIP: None
selector:
app: hadoop-nodemanager
---
apiVersion: v1
kind: Service
metadata:
name: nodemanager-external
namespace: bigdata4
labels:
app: hadoop-nodemanager
spec:
type: NodePort
ports:
- port: 8042
name: web
targetPort: 8042
selector:
app: hadoop-nodemanager
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: nodemanager
namespace: bigdata4
spec:
serviceName: nodemanager
replicas: 1
selector:
matchLabels:
app: hadoop-nodemanager
template:
metadata:
labels:
app: hadoop-nodemanager
spec:
containers:
- name: nodemanager
image: docker.1ms.run/zhuyifeiruichuang/hadoop:3.1.1
imagePullPolicy: IfNotPresent
command: ["yarn", "nodemanager"]
env:
- name: HADOOP_CONF_DIR
value: "/opt/hadoop/etc/hadoop"
ports:
- containerPort: 8042
volumeMounts:
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
volumes:
- name: hadoop-config-volume
configMap:
name: hadoop-config
resourcemanager.yaml
bash
apiVersion: v1
kind: Service
metadata:
name: resourcemanager
namespace: bigdata4
labels:
app: hadoop-resourcemanager
spec:
ports:
- port: 8088
name: web
- port: 8032
name: rpc
clusterIP: None
selector:
app: hadoop-resourcemanager
---
apiVersion: v1
kind: Service
metadata:
name: resourcemanager-external
namespace: bigdata4
labels:
app: hadoop-resourcemanager
spec:
type: NodePort
ports:
- port: 8088
name: web
targetPort: 8088
- port: 8032
name: rpc
targetPort: 8032
selector:
app: hadoop-resourcemanager
---
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: resourcemanager
namespace: bigdata4
spec:
serviceName: resourcemanager
replicas: 1
selector:
matchLabels:
app: hadoop-resourcemanager
template:
metadata:
labels:
app: hadoop-resourcemanager
spec:
containers:
- name: resourcemanager
image: docker.1ms.run/zhuyifeiruichuang/hadoop:3.1.1
imagePullPolicy: IfNotPresent
command: ["yarn", "resourcemanager"]
env:
- name: HADOOP_CONF_DIR
value: "/opt/hadoop/etc/hadoop"
ports:
- containerPort: 8088
- containerPort: 8032
volumeMounts:
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
- name: hadoop-config-volume
mountPath: /opt/hadoop/etc/hadoop/capacity-scheduler.xml
subPath: capacity-scheduler.xml
volumes:
- name: hadoop-config-volume
configMap:
name: hadoop-config
务必按照顺序执行命令进行部署。
bash
kubectl create namespace bigdata4
kubectl apply -f configmap.yaml
kubectl apply -f namenode.yaml
kubectl apply -f datanode.yaml
kubectl apply -f resourcemanager.yaml
kubectl apply -f nodemanager.yaml
检查部署结果
bash
root@master1:/opt/hadoop# kubectl get all -n bigdata4
NAME READY STATUS RESTARTS AGE
pod/datanode-0 1/1 Running 0 36m
pod/namenode-0 1/1 Running 0 36m
pod/nodemanager-0 1/1 Running 1 (62m ago) 77m
pod/resourcemanager-0 1/1 Running 0 77m
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
service/datanode ClusterIP None <none> 9864/TCP,9866/TCP 87m
service/datanode-external NodePort 10.233.16.190 <none> 9864:31131/TCP,9866:32664/TCP 44m
service/namenode ClusterIP None <none> 9870/TCP,9000/TCP 87m
service/namenode-external NodePort 10.233.9.37 <none> 9870:30070/TCP,9000:30090/TCP 87m
service/nodemanager ClusterIP None <none> 8042/TCP 87m
service/nodemanager-external NodePort 10.233.42.117 <none> 8042:32553/TCP 87m
service/resourcemanager ClusterIP None <none> 8088/TCP,8032/TCP 87m
service/resourcemanager-external NodePort 10.233.15.123 <none> 8088:31142/TCP,8032:31569/TCP 87m
NAME READY AGE
statefulset.apps/datanode 1/1 87m
statefulset.apps/namenode 1/1 87m
statefulset.apps/nodemanager 1/1 87m
statefulset.apps/resourcemanager 1/1 87m
检查namenode和datanode
bash
root@master1:/opt/hadoop# kubectl logs -n bigdata4 pods/namenode-0
root@master1:/opt/hadoop# kubectl logs -n bigdata4 pods/datanode-0