配置文件
bash
apiVersion: v1
kind: ConfigMap
metadata:
name: hadoop
data:
core-site.xml: |-
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.199.56:8020</value>
<description>根据角色定义的节点填写对应的IP</description>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml: |-
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.use.datanode.hostname</name>
<value>false</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/dfs/nn</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>/dfs/snn</value>
</property>
<property>
<name>dfs.namenode.handler.count</name>
<value>100</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/dfs/dn</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.servicerpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
</configuration>
mapred-site.xml: |-
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>192.168.199.58:10020</value>
<description>根据角色定义的节点填写对应的IP</description>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>192.168.199.58:19888</value>
<description>根据角色定义的节点填写对应的IP</description>
</property>
</configuration>
yarn-site.xml: |-
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>192.168.199.56</value>
<description>根据角色定义的节点填写对应的IP</description>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>2048</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://192.168.199.58:19888/jobhistory/logs</value>
<description>根据角色定义的节点填写对应的IP</description>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.timeline-service.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>
/opt/hadoop/etc/hadoop,
/opt/hadoop/share/hadoop/common/*,
/opt/hadoop/share/hadoop/common/lib/*,
/opt/hadoop/share/hadoop/hdfs/*,
/opt/hadoop/share/hadoop/hdfs/lib/*,
/opt/hadoop/share/hadoop/mapreduce/*,
/opt/hadoop/share/hadoop/mapreduce/lib/*,
/opt/hadoop/share/hadoop/yarn/*,
/opt/hadoop/share/hadoop/yarn/lib/*
</value>
</property>
<property>
<description>List of directories to store localized files in.</description>
<name>yarn.nodemanager.local-dirs</name>
<value>/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
</property>
<property>
<description>Where to store container logs.</description>
<name>yarn.nodemanager.log-dirs</name>
<value>/opt/hadoop/logs/yarn/containers</value>
</property>
<property>
<description>Where to aggregate logs to.</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/opt/hadoop/logs/yarn/apps</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name>
<value>1.0</value>
</property>
</configuration>
部署HDFS
部署namenode
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: namenode
labels:
app: namenode
spec:
selector:
matchLabels:
app: namenode
replicas: 1
template:
metadata:
labels:
app: namenode
spec:
initContainers:
- name: dfs-init # 通过initContainer判断是否初始化namenode
image: hadoop:2.10.1
imagePullPolicy: IfNotPresent
env:
- name: HADOOP_LIBEXEC_DIR # 如果在构建镜像时未加入环境变量,可以在此处配置,支持覆盖镜像变量
value: /opt/hadoop/libexec
command: # 如未初始化则初始化
- "sh"
- "-c"
- "if [ ! -d /dfs/nn ];then mkdir /dfs/nn && hdfs namenode -format;fi"
volumeMounts:
- name: localtime
mountPath: /etc/localtime
- name: dfs
mountPath: /dfs # 挂载数据目录到pod目录/dfs
- name: config
mountPath: /opt/hadoop/etc/hadoop/core-site.xml # 挂载配置文件
subPath: core-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
containers:
- name: namenode
image: hadoop:2.10.1
imagePullPolicy: IfNotPresent
resources: # 根据规划配置资源
limits:
cpu: 1000m
memory: 2Gi
env:
- name: HADOOP_LIBEXEC_DIR
value: /opt/hadoop/libexec
command: # namenode启动脚本,目前依靠打印日志保持进程前台,待优化
- "sh"
- "-c"
- "hdfs namenode"
volumeMounts:
- name: localtime
mountPath: /etc/localtime # 挂载宿主机Asia/Shanghai时区
- name: dfs
mountPath: /dfs
- name: config
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
lifecycle:
preStop: # pod销毁时,提前优雅的停止服务
exec:
command:
- "sh"
- "-c"
- "hdfs --daemon stop namenode"
volumes:
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
- name: dfs
hostPath: # 挂载节点存储目录
path: /dfs
- name: config # 挂载配置文件
configMap:
name: hadoop
restartPolicy: Always
hostNetwork: true # 网络模式为宿主机模式
hostAliases: # 配置pod域名解析,自动添加到/etc/hosts里
- ip: "192.168.199.56"
hostnames:
- "bigdata199056"
- ip: "192.168.199.57"
hostnames:
- "bigdata199057"
- ip: "192.168.199.58"
hostnames:
- "bigdata199058"
nodeSelector: # 选择对应角色节点
namenode: "true"
tolerations: # 配置容忍节点污点,负责无法调度上去
- key: "bigdata"
value: "true"
operator: "Equal"
effect: "NoSchedule"
部署secondarynamenode
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: secondarynamenode
labels:
app: secondarynamenode
spec:
selector:
matchLabels:
app: secondarynamenode
replicas: 1
template:
metadata:
labels:
app: secondarynamenode
spec:
containers:
- name: secondarynamenode
image: hadoop:2.10.1
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 1000m
memory: 2Gi
env:
- name: HADOOP_LIBEXEC_DIR
value: /opt/hadoop/libexec
command:
- "sh"
- "-c"
- "hdfs secondarynamenode"
volumeMounts:
- name: localtime
mountPath: /etc/localtime
- name: dfs
mountPath: /dfs
- name: config
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
lifecycle:
preStop:
exec:
command:
- "sh"
- "-c"
- "hdfs --daemon stop secondarynamenode"
volumes:
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
- name: dfs
hostPath:
path: /dfs
- name: config
configMap:
name: hadoop
restartPolicy: Always
hostNetwork: true
hostAliases:
- ip: "192.168.199.56"
hostnames:
- "bigdata199056"
- ip: "192.168.199.57"
hostnames:
- "bigdata199057"
- ip: "192.168.199.58"
hostnames:
- "bigdata199058"
nodeSelector:
secondarynamenode: "true"
tolerations:
- key: "bigdata"
value: "true"
operator: "Equal"
effect: "NoSchedule"
部署datanode
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: datanode
labels:
app: datanode
spec:
selector:
matchLabels:
app: datanode
replicas: 3 # 根据规划的节点数调整
template:
metadata:
labels:
app: datanode
spec:
containers:
- name: datanode
image: hadoop:2.10.1
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 500m
memory: 1Gi
env:
- name: HADOOP_LIBEXEC_DIR
value: /opt/hadoop/libexec
command:
- "sh"
- "-c"
- "hdfs datanode"
volumeMounts:
- name: localtime
mountPath: /etc/localtime
- name: dfs
mountPath: /dfs
- name: config
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
lifecycle:
preStop:
exec:
command:
- "sh"
- "-c"
- "hdfs --daemon stop datanode"
volumes:
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
- name: dfs
hostPath:
path: /dfs
- name: config
configMap:
name: hadoop
restartPolicy: Always
hostNetwork: true
hostAliases:
- ip: "192.168.199.56"
hostnames:
- "bigdata199056"
- ip: "192.168.199.57"
hostnames:
- "bigdata199057"
- ip: "192.168.199.58"
hostnames:
- "bigdata199058"
nodeSelector:
datanode: "true"
tolerations:
- key: "bigdata"
value: "true"
operator: "Equal"
effect: "NoSchedule"
affinity:
podAntiAffinity: # 配置pod反亲和性,不允许pod调度到相同的节点上
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: datanode
topologyKey: kubernetes.io/hostname
部署YARN
部署resourcemanager
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: resourcemanager
labels:
app: resourcemanager
spec:
selector:
matchLabels:
app: resourcemanager
replicas: 1
template:
metadata:
labels:
app: resourcemanager
spec:
containers:
- name: resourcemanager
image: hadoop:2.10.1
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 1000m
memory: 2Gi
env:
- name: HADOOP_LIBEXEC_DIR
value: /opt/hadoop/libexec
command:
- "sh"
- "-c"
- "yarn resourcemanager"
volumeMounts:
- name: localtime
mountPath: /etc/localtime
- name: config
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
lifecycle:
preStop:
exec:
command:
- "sh"
- "-c"
- "yarn --daemon stop resourcemanager"
volumes:
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
- name: config
configMap:
name: hadoop
restartPolicy: Always
hostNetwork: true
hostAliases:
- ip: "192.168.199.56"
hostnames:
- "bigdata199056"
- ip: "192.168.199.57"
hostnames:
- "bigdata199057"
- ip: "192.168.199.58"
hostnames:
- "bigdata199058"
nodeSelector:
resourcemanager: "true"
tolerations:
- key: "bigdata"
value: "true"
operator: "Equal"
effect: "NoSchedule"
部署弄得manager
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nodemanager
labels:
app: nodemanager
spec:
selector:
matchLabels:
app: nodemanager
replicas: 3
template:
metadata:
labels:
app: nodemanager
spec:
containers:
- name: nodemanager
image: hadoop:2.10.1
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 4000m
memory: 8Gi
env:
- name: HADOOP_LIBEXEC_DIR
value: /opt/hadoop/libexec
command:
- "sh"
- "-c"
- "yarn nodemanager"
volumeMounts:
- name: localtime
mountPath: /etc/localtime
- name: config
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
lifecycle:
preStop:
exec:
command:
- "sh"
- "-c"
- "yarn --daemon stop nodemanager"
volumes:
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
- name: config
configMap:
name: hadoop
restartPolicy: Always
hostNetwork: true
hostAliases:
- ip: "192.168.199.56"
hostnames:
- "bigdata199056"
- ip: "192.168.199.57"
hostnames:
- "bigdata199057"
- ip: "192.168.199.58"
hostnames:
- "bigdata199058"
nodeSelector:
nodemanager: "true"
tolerations:
- key: "bigdata"
value: "true"
operator: "Equal"
effect: "NoSchedule"
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchLabels:
app: nodemanager
topologyKey: kubernetes.io/hostname
部署historyserver
yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: historyserver
labels:
app: historyserver
spec:
selector:
matchLabels:
app: historyserver
replicas: 1
template:
metadata:
labels:
app: historyserver
spec:
containers:
- name: historyserver
image: hadoop:2.10.1
imagePullPolicy: IfNotPresent
resources:
limits:
cpu: 500m
memory: 1Gi
env:
- name: HADOOP_LIBEXEC_DIR
value: /opt/hadoop/libexec
command:
- "sh"
- "-c"
- "mapred historyserver"
volumeMounts:
- name: localtime
mountPath: /etc/localtime
- name: dfs
mountPath: /dfs
- name: config
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
subPath: hdfs-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
subPath: yarn-site.xml
- name: config
mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
subPath: mapred-site.xml
lifecycle:
preStop:
exec:
command:
- "sh"
- "-c"
- "mapred --daemon stop historyserver"
volumes:
- name: localtime
hostPath:
path: /usr/share/zoneinfo/Asia/Shanghai
- name: dfs
hostPath:
path: /dfs
- name: config
configMap:
name: hadoop
restartPolicy: Always
hostNetwork: true
hostAliases:
- ip: "192.168.199.56"
hostnames:
- "bigdata199056"
- ip: "192.168.199.57"
hostnames:
- "bigdata199057"
- ip: "192.168.199.58"
hostnames:
- "bigdata199058"
nodeSelector:
historyserver: "true"
tolerations:
- key: "bigdata"
value: "true"
operator: "Equal"
effect: "NoSchedule"