Kubernetes运行大数据组件-运行hadoop

配置文件

bash 复制代码
apiVersion: v1
kind: ConfigMap
metadata:
    name: hadoop
data:
    core-site.xml: |-
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>fs.defaultFS</name>
                <value>hdfs://192.168.199.56:8020</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>io.file.buffer.size</name>
                <value>131072</value>
            </property>
            <property>
                <name>hadoop.proxyuser.root.groups</name>
                <value>*</value>
            </property>
            <property>
                <name>hadoop.proxyuser.root.hosts</name>
                <value>*</value>
            </property>
        </configuration>
    hdfs-site.xml: |-
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>dfs.webhdfs.enabled</name>
                <value>true</value>
            </property>
            <property>
                <name>dfs.datanode.use.datanode.hostname</name>
                <value>false</value>
            </property>
            <property>
                <name>dfs.replication</name>
                <value>3</value>
            </property>
            <property>
                <name>dfs.namenode.name.dir</name>
                <value>/dfs/nn</value>
            </property>
            <property>
                <name>dfs.namenode.checkpoint.dir</name>
                <value>/dfs/snn</value>
            </property>
            <property>
                <name>dfs.namenode.handler.count</name>
                <value>100</value>
            </property>
            <property>
                <name>dfs.datanode.data.dir</name>
                <value>/dfs/dn</value>
            </property>
            <property>
                <name>dfs.blocksize</name>
                <value>134217728</value>
            </property>
            <property>
                <name>dfs.namenode.rpc-bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>dfs.namenode.servicerpc-bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                  <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
                  <value>false</value>
            </property>
        </configuration>
    mapred-site.xml: |-
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
            </property>
            <property>
                <name>mapreduce.jobhistory.address</name>
                <value>192.168.199.58:10020</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>mapreduce.jobhistory.webapp.address</name>
                <value>192.168.199.58:19888</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
        </configuration>
    yarn-site.xml: |-
        <configuration>
        <!-- Site specific YARN configuration properties -->
            <property>
                <name>yarn.resourcemanager.hostname</name>
                <value>192.168.199.56</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>yarn.nodemanager.vmem-check-enabled</name>
                <value>false</value>
            </property>
            <property>
                <name>yarn.nodemanager.pmem-check-enabled</name>
                <value>false</value>
            </property>
            <property>
                <name>yarn.scheduler.minimum-allocation-mb</name>
                <value>1024</value>
            </property>
            <property>
                <name>yarn.scheduler.maximum-allocation-mb</name>
                <value>2048</value>
            </property>
            <property>
                <name>yarn.nodemanager.resource.memory-mb</name>
                <value>2048</value>
            </property>
            <property>
                <name>yarn.log-aggregation-enable</name>
                <value>true</value>
            </property>
            <property>
                <name>yarn.log.server.url</name>
                <value>http://192.168.199.58:19888/jobhistory/logs</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>yarn.log-aggregation.retain-seconds</name>
                <value>604800</value>
            </property>
            <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
            </property>
            <property>
                <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
                <value>org.apache.hadoop.mapred.ShuffleHandler</value>
            </property>
            <property>
                <name>yarn.resourcemanager.bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>yarn.nodemanager.bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>yarn.timeline-service.bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>yarn.application.classpath</name>
                <value>
                /opt/hadoop/etc/hadoop,
                /opt/hadoop/share/hadoop/common/*,
                /opt/hadoop/share/hadoop/common/lib/*,
                /opt/hadoop/share/hadoop/hdfs/*,
                /opt/hadoop/share/hadoop/hdfs/lib/*,
                /opt/hadoop/share/hadoop/mapreduce/*,
                /opt/hadoop/share/hadoop/mapreduce/lib/*,
                /opt/hadoop/share/hadoop/yarn/*,
                /opt/hadoop/share/hadoop/yarn/lib/*
                </value>
            </property>
            <property>
                <description>List of directories to store localized files in.</description>
                <name>yarn.nodemanager.local-dirs</name>
                <value>/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
            </property>
            <property>
                <description>Where to store container logs.</description>
                <name>yarn.nodemanager.log-dirs</name>
                <value>/opt/hadoop/logs/yarn/containers</value>
            </property>
            <property>
                <description>Where to aggregate logs to.</description>
                <name>yarn.nodemanager.remote-app-log-dir</name>
                <value>/opt/hadoop/logs/yarn/apps</value>
            </property>
            <property>
                <name>yarn.resourcemanager.scheduler.class</name>
                <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
            </property>
            <property>
                <name>yarn.scheduler.fair.preemption</name>
                <value>true</value>
            </property>
            <property>
                <name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name>
                <value>1.0</value>
            </property>
        </configuration>

部署HDFS

部署namenode

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: namenode
  labels:
    app: namenode
spec:
  selector:
    matchLabels:
      app: namenode
  replicas: 1
  template:
    metadata:
      labels:
        app: namenode
    spec:
      initContainers:
        - name: dfs-init	# 通过initContainer判断是否初始化namenode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          env:
            - name: HADOOP_LIBEXEC_DIR	# 如果在构建镜像时未加入环境变量,可以在此处配置,支持覆盖镜像变量
              value: /opt/hadoop/libexec
          command:	# 如未初始化则初始化
            - "sh"
            - "-c"
            - "if [ ! -d /dfs/nn ];then mkdir /dfs/nn && hdfs namenode -format;fi"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs	# 挂载数据目录到pod目录/dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml	# 挂载配置文件
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
      containers:
        - name: namenode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:	# 根据规划配置资源
            limits:
              cpu: 1000m
              memory: 2Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:		# namenode启动脚本,目前依靠打印日志保持进程前台,待优化
            - "sh"
            - "-c"
            - "hdfs namenode"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime		# 挂载宿主机Asia/Shanghai时区
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
          lifecycle:
            preStop:	 # pod销毁时,提前优雅的停止服务
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "hdfs --daemon stop namenode"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:		# 挂载节点存储目录
            path: /dfs
        - name: config		# 挂载配置文件
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true		 # 网络模式为宿主机模式
      hostAliases:		# 配置pod域名解析,自动添加到/etc/hosts里
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:		# 选择对应角色节点
        namenode: "true"
      tolerations:		# 配置容忍节点污点,负责无法调度上去
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

部署secondarynamenode

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: secondarynamenode
  labels:
    app: secondarynamenode
spec:
  selector:
    matchLabels:
      app: secondarynamenode
  replicas: 1
  template:
    metadata:
      labels:
        app: secondarynamenode
    spec:
      containers:
        - name: secondarynamenode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 1000m
              memory: 2Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "hdfs secondarynamenode"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "hdfs --daemon stop secondarynamenode"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:
            path: /dfs
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        secondarynamenode: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

部署datanode

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: datanode
  labels:
    app: datanode
spec:
  selector:
    matchLabels:
      app: datanode
  replicas: 3	# 根据规划的节点数调整
  template:
    metadata:
      labels:
        app: datanode
    spec:
      containers:
        - name: datanode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 500m
              memory: 1Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "hdfs datanode"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "hdfs --daemon stop datanode"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:
            path: /dfs
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        datanode: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"
      affinity:
        podAntiAffinity:	# 配置pod反亲和性,不允许pod调度到相同的节点上
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchLabels:
                  app: datanode
              topologyKey: kubernetes.io/hostname

部署YARN

部署resourcemanager

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: resourcemanager
  labels:
    app: resourcemanager
spec:
  selector:
    matchLabels:
      app: resourcemanager
  replicas: 1
  template:
    metadata:
      labels:
        app: resourcemanager
    spec:
      containers:
        - name: resourcemanager
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 1000m
              memory: 2Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "yarn resourcemanager"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "yarn --daemon stop resourcemanager"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        resourcemanager: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

部署弄得manager

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nodemanager
  labels:
    app: nodemanager
spec:
  selector:
    matchLabels:
      app: nodemanager
  replicas: 3
  template:
    metadata:
      labels:
        app: nodemanager
    spec:
      containers:
        - name: nodemanager
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 4000m
              memory: 8Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "yarn nodemanager"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "yarn --daemon stop nodemanager"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        nodemanager: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchLabels:
                  app: nodemanager
              topologyKey: kubernetes.io/hostname

部署historyserver

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: historyserver
  labels:
    app: historyserver
spec:
  selector:
    matchLabels:
      app: historyserver
  replicas: 1
  template:
    metadata:
      labels:
        app: historyserver
    spec:
      containers:
        - name: historyserver
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 500m
              memory: 1Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "mapred historyserver"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "mapred --daemon stop historyserver"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:
            path: /dfs
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        historyserver: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

hadoop组件运行情况

相关推荐
CDA数据分析师干货分享10 分钟前
【CDA 新一级】学习笔记第1篇:数据分析的时代背景
大数据·笔记·学习·数据分析·cda证书·cda数据分析师
数据智能老司机37 分钟前
基于 Kubernetes 的平台工程——云原生应用的挑战
云原生·kubernetes·devops
软件开发小陈39 分钟前
“我店模式”:零售转型中的场景化突围
大数据
数据智能老司机1 小时前
基于 Kubernetes 的平台工程——Kubernetes 上的平台化浪潮
kubernetes·云计算·devops
计算机毕业设计木哥2 小时前
基于大数据spark的医用消耗选品采集数据可视化分析系统【Hadoop、spark、python】
大数据·hadoop·python·信息可视化·spark·课程设计
岚天start2 小时前
K8S中,kubectl cordon、uncordon、drain、taint的区别
云原生·容器·kubernetes·cordon·uncordon·taint·drain
xiao-xiang3 小时前
elasticsearch mapping和template解析(自动分词)!
大数据·elasticsearch·搜索引擎
sleetdream3 小时前
Flink DataStream 按分钟或日期统计数据量
大数据·flink
人大博士的交易之路4 小时前
今日行情明日机会——20250813
大数据·数据挖掘·数据分析·缠中说禅·涨停回马枪
Elastic 中国社区官方博客5 小时前
超越相似名称:Elasticsearch semantic text 如何在简洁、高效、集成方面超越 OpenSearch semantic 字段
大数据·数据库·人工智能·elasticsearch·搜索引擎·ai·全文检索