Kubernetes运行大数据组件-运行hadoop

配置文件

bash 复制代码
apiVersion: v1
kind: ConfigMap
metadata:
    name: hadoop
data:
    core-site.xml: |-
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>fs.defaultFS</name>
                <value>hdfs://192.168.199.56:8020</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>io.file.buffer.size</name>
                <value>131072</value>
            </property>
            <property>
                <name>hadoop.proxyuser.root.groups</name>
                <value>*</value>
            </property>
            <property>
                <name>hadoop.proxyuser.root.hosts</name>
                <value>*</value>
            </property>
        </configuration>
    hdfs-site.xml: |-
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>dfs.webhdfs.enabled</name>
                <value>true</value>
            </property>
            <property>
                <name>dfs.datanode.use.datanode.hostname</name>
                <value>false</value>
            </property>
            <property>
                <name>dfs.replication</name>
                <value>3</value>
            </property>
            <property>
                <name>dfs.namenode.name.dir</name>
                <value>/dfs/nn</value>
            </property>
            <property>
                <name>dfs.namenode.checkpoint.dir</name>
                <value>/dfs/snn</value>
            </property>
            <property>
                <name>dfs.namenode.handler.count</name>
                <value>100</value>
            </property>
            <property>
                <name>dfs.datanode.data.dir</name>
                <value>/dfs/dn</value>
            </property>
            <property>
                <name>dfs.blocksize</name>
                <value>134217728</value>
            </property>
            <property>
                <name>dfs.namenode.rpc-bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>dfs.namenode.servicerpc-bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                  <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
                  <value>false</value>
            </property>
        </configuration>
    mapred-site.xml: |-
        <?xml version="1.0"?>
        <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
        <configuration>
            <property>
                <name>mapreduce.framework.name</name>
                <value>yarn</value>
            </property>
            <property>
                <name>mapreduce.jobhistory.address</name>
                <value>192.168.199.58:10020</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>mapreduce.jobhistory.webapp.address</name>
                <value>192.168.199.58:19888</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
        </configuration>
    yarn-site.xml: |-
        <configuration>
        <!-- Site specific YARN configuration properties -->
            <property>
                <name>yarn.resourcemanager.hostname</name>
                <value>192.168.199.56</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>yarn.nodemanager.vmem-check-enabled</name>
                <value>false</value>
            </property>
            <property>
                <name>yarn.nodemanager.pmem-check-enabled</name>
                <value>false</value>
            </property>
            <property>
                <name>yarn.scheduler.minimum-allocation-mb</name>
                <value>1024</value>
            </property>
            <property>
                <name>yarn.scheduler.maximum-allocation-mb</name>
                <value>2048</value>
            </property>
            <property>
                <name>yarn.nodemanager.resource.memory-mb</name>
                <value>2048</value>
            </property>
            <property>
                <name>yarn.log-aggregation-enable</name>
                <value>true</value>
            </property>
            <property>
                <name>yarn.log.server.url</name>
                <value>http://192.168.199.58:19888/jobhistory/logs</value>
                <description>根据角色定义的节点填写对应的IP</description>
            </property>
            <property>
                <name>yarn.log-aggregation.retain-seconds</name>
                <value>604800</value>
            </property>
            <property>
                <name>yarn.nodemanager.aux-services</name>
                <value>mapreduce_shuffle</value>
            </property>
            <property>
                <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
                <value>org.apache.hadoop.mapred.ShuffleHandler</value>
            </property>
            <property>
                <name>yarn.resourcemanager.bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>yarn.nodemanager.bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>yarn.timeline-service.bind-host</name>
                <value>0.0.0.0</value>
            </property>
            <property>
                <name>yarn.application.classpath</name>
                <value>
                /opt/hadoop/etc/hadoop,
                /opt/hadoop/share/hadoop/common/*,
                /opt/hadoop/share/hadoop/common/lib/*,
                /opt/hadoop/share/hadoop/hdfs/*,
                /opt/hadoop/share/hadoop/hdfs/lib/*,
                /opt/hadoop/share/hadoop/mapreduce/*,
                /opt/hadoop/share/hadoop/mapreduce/lib/*,
                /opt/hadoop/share/hadoop/yarn/*,
                /opt/hadoop/share/hadoop/yarn/lib/*
                </value>
            </property>
            <property>
                <description>List of directories to store localized files in.</description>
                <name>yarn.nodemanager.local-dirs</name>
                <value>/var/lib/hadoop-yarn/cache/${user.name}/nm-local-dir</value>
            </property>
            <property>
                <description>Where to store container logs.</description>
                <name>yarn.nodemanager.log-dirs</name>
                <value>/opt/hadoop/logs/yarn/containers</value>
            </property>
            <property>
                <description>Where to aggregate logs to.</description>
                <name>yarn.nodemanager.remote-app-log-dir</name>
                <value>/opt/hadoop/logs/yarn/apps</value>
            </property>
            <property>
                <name>yarn.resourcemanager.scheduler.class</name>
                <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
            </property>
            <property>
                <name>yarn.scheduler.fair.preemption</name>
                <value>true</value>
            </property>
            <property>
                <name>yarn.scheduler.fair.preemption.cluster-utilization-threshold</name>
                <value>1.0</value>
            </property>
        </configuration>

部署HDFS

部署namenode

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: namenode
  labels:
    app: namenode
spec:
  selector:
    matchLabels:
      app: namenode
  replicas: 1
  template:
    metadata:
      labels:
        app: namenode
    spec:
      initContainers:
        - name: dfs-init	# 通过initContainer判断是否初始化namenode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          env:
            - name: HADOOP_LIBEXEC_DIR	# 如果在构建镜像时未加入环境变量,可以在此处配置,支持覆盖镜像变量
              value: /opt/hadoop/libexec
          command:	# 如未初始化则初始化
            - "sh"
            - "-c"
            - "if [ ! -d /dfs/nn ];then mkdir /dfs/nn && hdfs namenode -format;fi"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs	# 挂载数据目录到pod目录/dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml	# 挂载配置文件
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
      containers:
        - name: namenode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:	# 根据规划配置资源
            limits:
              cpu: 1000m
              memory: 2Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:		# namenode启动脚本,目前依靠打印日志保持进程前台,待优化
            - "sh"
            - "-c"
            - "hdfs namenode"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime		# 挂载宿主机Asia/Shanghai时区
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
          lifecycle:
            preStop:	 # pod销毁时,提前优雅的停止服务
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "hdfs --daemon stop namenode"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:		# 挂载节点存储目录
            path: /dfs
        - name: config		# 挂载配置文件
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true		 # 网络模式为宿主机模式
      hostAliases:		# 配置pod域名解析,自动添加到/etc/hosts里
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:		# 选择对应角色节点
        namenode: "true"
      tolerations:		# 配置容忍节点污点,负责无法调度上去
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

部署secondarynamenode

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: secondarynamenode
  labels:
    app: secondarynamenode
spec:
  selector:
    matchLabels:
      app: secondarynamenode
  replicas: 1
  template:
    metadata:
      labels:
        app: secondarynamenode
    spec:
      containers:
        - name: secondarynamenode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 1000m
              memory: 2Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "hdfs secondarynamenode"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "hdfs --daemon stop secondarynamenode"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:
            path: /dfs
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        secondarynamenode: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

部署datanode

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: datanode
  labels:
    app: datanode
spec:
  selector:
    matchLabels:
      app: datanode
  replicas: 3	# 根据规划的节点数调整
  template:
    metadata:
      labels:
        app: datanode
    spec:
      containers:
        - name: datanode
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 500m
              memory: 1Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "hdfs datanode"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "hdfs --daemon stop datanode"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:
            path: /dfs
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        datanode: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"
      affinity:
        podAntiAffinity:	# 配置pod反亲和性,不允许pod调度到相同的节点上
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchLabels:
                  app: datanode
              topologyKey: kubernetes.io/hostname

部署YARN

部署resourcemanager

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: resourcemanager
  labels:
    app: resourcemanager
spec:
  selector:
    matchLabels:
      app: resourcemanager
  replicas: 1
  template:
    metadata:
      labels:
        app: resourcemanager
    spec:
      containers:
        - name: resourcemanager
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 1000m
              memory: 2Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "yarn resourcemanager"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "yarn --daemon stop resourcemanager"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        resourcemanager: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

部署弄得manager

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nodemanager
  labels:
    app: nodemanager
spec:
  selector:
    matchLabels:
      app: nodemanager
  replicas: 3
  template:
    metadata:
      labels:
        app: nodemanager
    spec:
      containers:
        - name: nodemanager
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 4000m
              memory: 8Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "yarn nodemanager"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "yarn --daemon stop nodemanager"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        nodemanager: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            - labelSelector:
                matchLabels:
                  app: nodemanager
              topologyKey: kubernetes.io/hostname

部署historyserver

yaml 复制代码
apiVersion: apps/v1
kind: Deployment
metadata:
  name: historyserver
  labels:
    app: historyserver
spec:
  selector:
    matchLabels:
      app: historyserver
  replicas: 1
  template:
    metadata:
      labels:
        app: historyserver
    spec:
      containers:
        - name: historyserver
          image: hadoop:2.10.1
          imagePullPolicy: IfNotPresent
          resources:
            limits:
              cpu: 500m
              memory: 1Gi
          env:
            - name: HADOOP_LIBEXEC_DIR
              value: /opt/hadoop/libexec
          command:
            - "sh"
            - "-c"
            - "mapred historyserver"
          volumeMounts:
            - name: localtime
              mountPath: /etc/localtime
            - name: dfs
              mountPath: /dfs
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/core-site.xml
              subPath: core-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/hdfs-site.xml
              subPath: hdfs-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/yarn-site.xml
              subPath: yarn-site.xml
            - name: config
              mountPath: /opt/hadoop/etc/hadoop/mapred-site.xml
              subPath: mapred-site.xml
          lifecycle:
            preStop:
              exec:
                command:
                  - "sh"
                  - "-c"
                  - "mapred --daemon stop historyserver"
      volumes:
        - name: localtime
          hostPath:
            path: /usr/share/zoneinfo/Asia/Shanghai
        - name: dfs
          hostPath:
            path: /dfs
        - name: config
          configMap:
            name: hadoop
      restartPolicy: Always
      hostNetwork: true
      hostAliases:
        - ip: "192.168.199.56"
          hostnames:
            - "bigdata199056"
        - ip: "192.168.199.57"
          hostnames:
            - "bigdata199057"
        - ip: "192.168.199.58"
          hostnames:
            - "bigdata199058"
      nodeSelector:
        historyserver: "true"
      tolerations:
        - key: "bigdata"
          value: "true"
          operator: "Equal"
          effect: "NoSchedule"

hadoop组件运行情况

相关推荐
Francek Chen2 小时前
【大数据技术基础 | 实验十二】Hive实验:Hive分区
大数据·数据仓库·hive·hadoop·分布式
Natural_yz5 小时前
大数据学习17之Spark-Core
大数据·学习·spark
莫叫石榴姐6 小时前
数据科学与SQL:组距分组分析 | 区间分布问题
大数据·人工智能·sql·深度学习·算法·机器学习·数据挖掘
条纹布鲁斯6 小时前
dockerdsktop修改安装路径/k8s部署wordpress和ubuntu
docker·kubernetes
魔珐科技7 小时前
以3D数字人AI产品赋能教育培训人才发展,魔珐科技亮相AI+教育创新与人才发展大会
大数据·人工智能
上优8 小时前
uniapp 选择 省市区 省市 以及 回显
大数据·elasticsearch·uni-app
samLi06209 小时前
【更新】中国省级产业集聚测算数据及协调集聚指数数据(2000-2022年)
大数据
Mephisto.java9 小时前
【大数据学习 | Spark-Core】Spark提交及运行流程
大数据·学习·spark
登云时刻10 小时前
Kubernetes集群外连接redis集群和使用redis-shake工具迁移数据(一)
redis·kubernetes·bootstrap
EasyCVR10 小时前
私有化部署视频平台EasyCVR宇视设备视频平台如何构建视频联网平台及升级视频转码业务?
大数据·网络·音视频·h.265