Kubernetes运行大数据组件-运行hadoop

配置文件

bash 复制代码
apiVersion: v1
kind: ConfigMap
metadata:
    name: hadoop
data:
    core-site.xml: |-
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>fs.defaultFS</name>
                <value>hdfs://192.168.199.56:8020</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>io.file.buffer.size</name>
                <value>131072</value>
            </property>
            <property>
                <name>hadoop.proxyuser.root.groups</name>
                <value>*</value>
            </property>
            <property>
                <name>hadoop.proxyuser.root.hosts</name>
                <value>*</value>
            </property>
        </configuration>
    hdfs-site.xml: |-
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>dfs.webhdfs.enabled</name>
                <value>true</value>
            </property>
            <property>
                <name>dfs.datanode.use.datanode.hostname</name>
                <value>false</value>
            </property>
            <property>
                <name>dfs.replication</name>
                <value>3</value>
            </property>
            <property>
                <name>dfs.namenode.name.dir</name>
                <value>/dfs/nn</value>
            </property>
            <property>
                <name>dfs.namenode.checkpoint.dir</name>
                <value>/dfs/snn</value>
            </property>
            <property>
                <name>dfs.namenode.handler.count</name>
                <value>100</value>
            </property>
            <property>
                <name>dfs.datanode.data.dir</name>
                <value>/dfs/dn</value>
            </property>
            <property>
                <name>dfs.blocksize</name>
                <value>134217728</value>
            </property>
            <property>
                <name>dfs.namenode.rpc-bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>dfs.namenode.servicerpc-bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                  <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
                  <value>false</value>
            </property>
        </configuration>
    mapred-site.xml: |-
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
            </property>
            <property>
                <name>mapreduce.jobhistory.address</name>
                <value>192.168.199.58:10020</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>mapreduce.jobhistory.webapp.address</name>
                <value>192.168.199.58:19888</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
        </configuration>
    yarn-site.xml: |-
        <configuration>
        <!-- Site specific YARN configuration properties -->
            <property>
                <name>yarn.resourcemanager.hostname</name>
                <value>192.168.199.56</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>yarn.nodemanager.vmem-check-enabled</name>
                <value>false</value>
            </property>
            <property>
                <name>yarn.nodemanager.pmem-check-enabled</name>
                <value>false</value>
            </property>
            <property>
                <name>yarn.scheduler.minimum-allocation-mb</name>
                <value>1024</value>
            </property>
            <property>
                <name>yarn.scheduler.maximum-allocation-mb</name>
                <value>2048</value>
            </property>
            <property>
                <name>yarn.nodemanager.resource.memory-mb</name>
                <value>2048</value>
            </property>
            <property>
                <name>yarn.log-aggregation-enable</name>
                <value>true</value>
            </property>
            <property>
                <name>yarn.log.server.url</name>
                <value>http://192.168.199.58:19888/jobhistory/logs</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>yarn.log-aggregation.retain-seconds</name>
                <value>604800</value>
            </property>
            <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
            </property>
            <property>
                <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
                <value>org.apache.hadoop.mapred.ShuffleHandler</value>
            </property>
            <property>
                <name>yarn.resourcemanager.bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>yarn.nodemanager.bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>yarn.timeline-service.bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>yarn.application.classpath</name>
                <value>
                /opt/hadoop/etc/hadoop,
                /opt/hadoop/share/hadoop/common/*,
                /opt/hadoop/share/hadoop/common/lib/*,
                /opt/hadoop/share/hadoop/hdfs/*,
                /opt/hadoop/share/hadoop/hdfs/lib/*,
                /opt/hadoop/share/hadoop/mapreduce/*,
                /opt/hadoop/share/hadoop/mapreduce/lib/*,
                /opt/hadoop/share/hadoop/yarn/*,
                /opt/hadoop/share/hadoop/yarn/lib/*
                </value>
            </property>
            <property>
                <description>List of directories to store localized files in.</description>
                <name>yarn.nodemanager.local-dirs</name>
                <value>/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
            </property>
            <property>
                <description>Where to store container logs.</description>
                <name>yarn.nodemanager.log-dirs</name>
                <value>/opt/hadoop/logs/yarn/containers</value>
            </property>
            <property>
                <description>Where to aggregate logs to.</description>
                <name>yarn.nodemanager.remote-app-log-dir</name>
                <value>/opt/hadoop/logs/yarn/apps</value>
            </property>
            <property>
                <name>yarn.resourcemanager.scheduler.class</name>
                <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
            </property>
            <property>
                <name>yarn.scheduler.fair.preemption</name>
                <value>true</value>
            </property>
            <property>
                <name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name>
                <value>1.0</value>
            </property>
        </configuration>

部署HDFS

部署namenode

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: namenode
  labels:
    app: namenode
spec:
  selector:
    matchLabels:
      app: namenode
  replicas: 1
  template:
    metadata:
      labels:
        app: namenode
    spec:
      initContainers:
        - name: dfs-init	# 通过initContainer判断是否初始化namenode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          env:
            - name: HADOOP_LIBEXEC_DIR	# 如果在构建镜像时未加入环境变量,可以在此处配置,支持覆盖镜像变量
              value: /opt/hadoop/libexec
          command:	# 如未初始化则初始化
            - "sh"
            - "-c"
            - "if [ ! -d /dfs/nn ];then mkdir /dfs/nn && hdfs namenode -format;fi"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs	# 挂载数据目录到pod目录/dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml	# 挂载配置文件
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
      containers:
        - name: namenode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:	# 根据规划配置资源
            limits:
              cpu: 1000m
              memory: 2Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:		# namenode启动脚本,目前依靠打印日志保持进程前台,待优化
            - "sh"
            - "-c"
            - "hdfs namenode"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime		# 挂载宿主机Asia/Shanghai时区
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
          lifecycle:
            preStop:	 # pod销毁时,提前优雅的停止服务
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "hdfs --daemon stop namenode"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:		# 挂载节点存储目录
            path: /dfs
        - name: config		# 挂载配置文件
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true		 # 网络模式为宿主机模式
      hostAliases:		# 配置pod域名解析,自动添加到/etc/hosts里
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:		# 选择对应角色节点
        namenode: "true"
      tolerations:		# 配置容忍节点污点,负责无法调度上去
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

部署secondarynamenode

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: secondarynamenode
  labels:
    app: secondarynamenode
spec:
  selector:
    matchLabels:
      app: secondarynamenode
  replicas: 1
  template:
    metadata:
      labels:
        app: secondarynamenode
    spec:
      containers:
        - name: secondarynamenode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 1000m
              memory: 2Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "hdfs secondarynamenode"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "hdfs --daemon stop secondarynamenode"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:
            path: /dfs
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        secondarynamenode: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

部署datanode

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: datanode
  labels:
    app: datanode
spec:
  selector:
    matchLabels:
      app: datanode
  replicas: 3	# 根据规划的节点数调整
  template:
    metadata:
      labels:
        app: datanode
    spec:
      containers:
        - name: datanode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 500m
              memory: 1Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "hdfs datanode"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "hdfs --daemon stop datanode"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:
            path: /dfs
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        datanode: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"
      affinity:
        podAntiAffinity:	# 配置pod反亲和性,不允许pod调度到相同的节点上
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchLabels:
                  app: datanode
              topologyKey: kubernetes.io/hostname

部署YARN

部署resourcemanager

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: resourcemanager
  labels:
    app: resourcemanager
spec:
  selector:
    matchLabels:
      app: resourcemanager
  replicas: 1
  template:
    metadata:
      labels:
        app: resourcemanager
    spec:
      containers:
        - name: resourcemanager
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 1000m
              memory: 2Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "yarn resourcemanager"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "yarn --daemon stop resourcemanager"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        resourcemanager: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

部署弄得manager

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nodemanager
  labels:
    app: nodemanager
spec:
  selector:
    matchLabels:
      app: nodemanager
  replicas: 3
  template:
    metadata:
      labels:
        app: nodemanager
    spec:
      containers:
        - name: nodemanager
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 4000m
              memory: 8Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "yarn nodemanager"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "yarn --daemon stop nodemanager"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        nodemanager: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchLabels:
                  app: nodemanager
              topologyKey: kubernetes.io/hostname

部署historyserver

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: historyserver
  labels:
    app: historyserver
spec:
  selector:
    matchLabels:
      app: historyserver
  replicas: 1
  template:
    metadata:
      labels:
        app: historyserver
    spec:
      containers:
        - name: historyserver
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 500m
              memory: 1Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "mapred historyserver"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "mapred --daemon stop historyserver"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:
            path: /dfs
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        historyserver: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

hadoop组件运行情况

相关推荐
hjnjmjkj几秒前
基于windows的docker-desktop安装kubenetes以及dashboard
docker·容器·kubernetes
fruge几秒前
git上传 项目 把node_modules也上传至仓库了,在文件.gitignore 中忽略node_modules 依然不行
大数据·git·elasticsearch
python资深爱好者33 分钟前
什么容错性以及Spark Streaming如何保证容错性
大数据·分布式·spark
B站计算机毕业设计超人2 小时前
计算机毕业设计hadoop+spark旅游景点推荐 旅游推荐系统 旅游可视化 旅游爬虫 景区客流量预测 旅游大数据 大数据毕业设计
大数据·hadoop·爬虫·深度学习·机器学习·数据可视化·推荐算法
qiquandongkh2 小时前
2025年股指期货和股指期权合约交割的通知!
大数据·金融·区块链
Ray.19983 小时前
优化 Flink 消费 Kafka 数据的速度:实战指南
大数据·flink·kafka
D愿你归来仍是少年3 小时前
Python解析 Flink Job 依赖的checkpoint 路径
大数据·python·flink
说私域4 小时前
利用开源AI智能名片2+1链动模式S2B2C商城小程序构建企业私域流量池的策略与实践
大数据·人工智能·小程序·开源
yinbp5 小时前
bboss v7.3.5来袭!新增异地灾备机制和Kerberos认证机制,助力企业数据安全
大数据·elasticsearch·微服务·etl·restclient·bboss
Karoku0665 小时前
【CI/CD】Jenkinsfile管理+参数化构建+邮件通知以及Jenkins + SonarQube 代码审查
运维·ci/cd·容器·kubernetes·jenkins·rancher