Trino On K8S (DockerHub)

文章目录

    • [Trino On K8S (DockerHub)](#Trino On K8S (DockerHub))
      • 前期准备
      • [部署 Trino](#部署 Trino)
        • 下载Helm源
        • [部署 Trino](#部署 Trino)
        • [挂载 core-site.xml hdfs-site.xml配置](#挂载 core-site.xml hdfs-site.xml配置)
        • [~~忽略 修改 coordinator/worker configmap~~](#忽略 修改 coordinator/worker configmap)
        • [~~忽略 修改 service~~](#忽略 修改 service)
        • [~~忽略 修改 coordinator/worker deploy~~](#忽略 修改 coordinator/worker deploy)
        • 验证
        • Worker扩缩容
      • [集成Paimon【目前 0.8版本Paimon 仅支持 Trino 420/427】](#集成Paimon【目前 0.8版本Paimon 仅支持 Trino 420/427】)
        • [增加 Paimon Catalog](#增加 Paimon Catalog)
        • [重新构建镜像 挂载paimon jar包](#重新构建镜像 挂载paimon jar包)
        • [~~忽略挂载Paimon jar包~~](#忽略挂载Paimon jar包)

Trino On K8S (DockerHub)

基于 Dockerhub Trino 镜像部署,增加Paimon Jar包需要使用 HostPath挂载(每台k8s节点需要分发)

目前 0.8版本Paimon 仅支持 Trino 420/427

前期准备

下载dockerhub镜像推送到harbor
shell 复制代码
docker pull trinodb/trino:420
docker image tag  trinodb/trino:420  10.83.195.8:1443/dockerhub/trino:420

docker push 10.83.195.8:1443/dockerhub/trino:420

部署 Trino

下载Helm源
shell 复制代码
# https://artifacthub.io/packages/helm/trino/trino
helm repo add trino https://trinodb.github.io/charts/
helm pull trino/trino --version 0.19.0

tar -zxvf trino-0.19.0.tgz
mv trino  trino-dockerhub
cd trino-dockerhub

修改 values.yaml

shell 复制代码
# 修改 image
image:
  registry: "10.83.195.8:1443"   #  docker push 10.83.195.6/dockerhub/trino:444
  repository: dockerhub/trino    # docker push 10.83.195.6/dockerhub/trino:444
  tag: "420"   # docker push 10.83.195.6/dockerhub/trino:444

# 外部访问
service:
  #type: ClusterIP
  type: NodePort
  port: 8080
  nodePort: 38080  # k8s默认端口范围:30000-32767

# worker pod数
server:
  workers: 2

# 添加 hive catalog
# hive.config.resources 和后面的挂载路径一致
catalogs:
  hive: |-
    connector.name=hive
    hive.metastore.uri=thrift://10.83.192.8:9083,thrift://10.83.192.9:9083
    hive.config.resources=/etc/trino/hadoop/core-site.xml,/etc/trino/hadoop/hdfs-site.xml
    fs.hadoop.enabled=true
部署 Trino
shell 复制代码
# 和 Chart.yaml 同级目录下执行
helm install trino-dockerhub ./ -n trino-dockerhub --create-namespace

# 在harbor Web 将bigdata项目改成 公开,否则因权限问题拉取镜像失败

# 重新构建镜像后,需要删除原有镜像
# ansible -i /opt/ansible/nodes all -m shell -a "docker rmi 10.83.195.6/bigdata/trino:420"

# 卸载
# helm uninstall trino-dockerhub -n trino-dockerhub

# 更新
helm upgrade trino-dockerhub ./ -n trino-dockerhub

# 查看
kubectl get po,svc -n trino-dockerhub

# 报错查看日志 
coordinator_name=`kubectl get pods -n trino-dockerhub|grep coordinator|awk '{print $1}'`
kubectl describe po $coordinator_name  -n trino-dockerhub 
kubectl logs $coordinator_name  -n trino-dockerhub  --all-containers

# 进入 trino 终端
coordinator_name=`kubectl get pods -n trino-dockerhub|grep coordinator|awk '{print $1}'`
kubectl exec -it $coordinator_name -n trino-dockerhub -- /usr/bin/trino --server http://trino-dockerhub:8080 --catalog=hive --schema=default --user=admin

# 查询 trino 自带数据库正常
select * from system.runtime.nodes;

# 查询hive表报错:Query 20240410_074243_00004_6sab9 failed: Failed to list directory: hdfs://bigbigworld/user/hive/warehouse/test.db/xxx
# 需要增加 core-site.xml hdfs-site.xml配置
select * from hive.test.zxl_tmp;
挂载 core-site.xml hdfs-site.xml配置

配置configmap

shell 复制代码
# 需要把 core-site.xml hdfs-site.xml 中配置的hostname 换成ip

kubectl create configmap trino-hadoop-config \
  --from-file=core-site.xml \
  --from-file=hdfs-site.xml \
  -n trino-test

修改 Helm values.yaml

shell 复制代码
configMounts:
  - name: hadoop-config
    configMap: trino-hadoop-config
    path: /etc/trino/hadoop
    

# admin用户
env:
  - name: DHADOOP_USER_NAME
    value: "admin"

**忽略 挂载 core-site.xml hdfs-site.xml配置 **

shell 复制代码
# vim templates/configmap-hadoop.yaml
# 需要把 core-site.xml hdfs-site.xml 中配置的hostname 换成ip

apiVersion: v1
kind: ConfigMap
metadata:
  name: {{ include "trino.fullname" . }}-hadoop
  labels:
    app.kubernetes.io/name: {{ include "trino.name" . }}
    helm.sh/chart: {{ include "trino.chart" . }}
    app.kubernetes.io/instance: {{ .Release.Name }}
data:
  core-site.xml: |
    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
      <property>
        <name>fs.defaultFS</name>
        <value>hdfs://bigbigworld</value>
      </property>
      <property>
        <name>ha.zookeeper.quorum</name>
        <value>10.83.192.6:2181,10.83.192.7:2181,10.83.192.8:2181</value>
      </property>
       <property>
        <name>io.file.buffer.size</name>
        <value>131072</value>
      </property>
        <property>
        <name>fs.trash.interval</name>
        <value>4320</value>
      </property>
      <property>
        <name>fs.trash.checkpoint.interval</name>
        <value>60</value>
      </property>
      <property>
        <name>io.native.lib.available</name>
        <value>true</value>
      </property>
      <property>
        <name>net.topology.script.file.name</name>
        <value>/opt/apache/hadoop/etc/hadoop/topology.sh</value>
      </property>
       <property>
        <name>dfs.ha.fencing.methods</name>
        <value>
          sshfence
          shell(/bin/true)
        </value>
      </property>
      <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>file:///home/admin/.ssh/id_rsa</value>
      </property>
      <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>1000</value>
      </property>
      <property>
        <name>hadoop.proxyuser.admin.hosts</name>
        <value>*</value>
      </property>
      <property>
        <name>hadoop.proxyuser.admin.users</name>
        <value>*</value>
      </property>
      <property>
        <name>hadoop.proxyuser.admin.groups</name>
        <value>*</value>
      </property>
      <property>
        <name>hadoop.proxyuser.hive.hosts</name>
        <value>*</value>
      </property>
      <property>
        <name>hadoop.proxyuser.hive.users</name>
        <value>*</value>
      </property>
      <property>
        <name>hadoop.proxyuser.yarn.groups</name>
        <value>*</value>
      </property>
      <property>
        <name>hadoop.proxyuser.yarn.users</name>
        <value>*</value>
      </property>
      <property>
        <name>hadoop.proxyuser.yarn.groups</name>
        <value>*</value>
      </property>
      <property>
        <name>ipc.server.read.threadpool.size</name>
        <value>5</value>
      </property>

      <property>
        <name>ipc.server.listen.queue.size</name>
        <value>1024</value>
      </property>

    </configuration>

  hdfs-site.xml: |
    <?xml version="1.0"?>
    <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
    <configuration>
      <property>
        <name>dfs.replication</name>
        <value>3</value>
      </property>
      <property>
        <name>dfs.nameservices</name>
        <value>bigbigworld</value>
      </property>
        <property>
        <name>dfs.ha.namenodes.bigbigworld</name>
        <value>nn1,nn2</value>
      </property>
      <property>
        <name>dfs.namenode.rpc-address.bigbigworld.nn1</name>
        <value>10.83.192.6:8020</value>
      </property>
      <property>
        <name>dfs.namenode.rpc-address.bigbigworld.nn2</name>
        <value>10.83.192.7:8020</value>
      </property>
      <property>
        <name>dfs.namenode.http-address</name>
        <value>0.0.0.0:9870</value>
      </property>
      <property>
        <name>dfs.namenode.http-address.bigbigworld.nn1</name>
        <value>10.83.192.6:9870</value>
      </property>
      <property>
        <name>dfs.namenode.http-address.bigbigworld.nn2</name>
        <value>10.83.192.7:9870</value>
      </property>
      <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://10.83.192.6:8485;10.83.192.7:8485;10.83.192.8:8485/bigbigworld</value>
      </property>
      <property>
        <name>dfs.permissions.enabled</name>
        <value>true</value>
      </property>
      <property>
        <name>dfs.client.failover.proxy.provider.bigbigworld</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
      </property>
      <property>
        <name>dfs.ha.automatic-failover.enabled.bigbigworld</name>
        <value>true</value>
      </property>
      <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/data1/hadoop/dfs/journalnode</value>
      </property>
      <property>
        <name>dfs.qjournal.write-txns.timeout.ms</name>
        <value>60000</value>
      </property>
      <property>
        <name>dfs.namenode.name.dir</name>
        <value>file:///data1/hadoop/dfs/namenode</value>
      </property>
      <property>
        <name>dfs.datanode.data.dir</name>
        <value>file:///data1/hadoop/dfs/datanode</value>
      </property>
      <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
      </property>
      <property>
        <name>dfs.namenode.handler.count</name>
        <value>192</value>
      </property>
      <property>
        <name>dfs.datanode.handler.count</name>
        <value>96</value>
      </property>
      <property>
        <name>dfs.datanode.max.transfer.threads</name>
        <value>16384</value>
      </property>
      <property>
        <name>dfs.datanode.socket.write.timeout</name>
        <value>480000</value>
      </property>
      <property>
        <name>dfs.client.socket-timeout</name>
        <value>300000</value>
      </property>
      <property>
        <name>dfs.datanode.balance.bandwidthPerSec</name>
        <value>209715200</value>
      </property>
      <property>
        <name>dfs.datanode.balance.max.concurrent.moves</name>
        <value>64</value>
      </property>
      <property>
        <name>dfs.namenode.replication.max-streams</name>
        <value>128</value>
      </property>
      <property>
        <name>dfs.namenode.replication.max-streams-hard-limit</name>
        <value>512</value>
      </property>
      <property>
        <name>dfs.namenode.replication.work.multiplier.per.iteration</name>
        <value>512</value>
      </property>
      <property>
        <name>dfs.hosts</name>
        <value>/opt/apache/hadoop/etc/hadoop/dfs-hosts.includes</value>
      </property>
      <property>
        <name>dfs.hosts.exclude</name>
        <value>/opt/apache/hadoop/etc/hadoop/dfs-hosts.excludes</value>
      </property>
      <property>
        <name>dfs.balancer.moverThreads</name>
        <value>2000</value>
      </property>
      <property>
        <name>dfs.balancer.max-size-to-move</name>
        <value>107374182400</value>
      </property>
      <property>
        <name>dfs.balancer.getBlocks.min-block-size</name>
        <value>1048576</value>
      </property>
      <property>
        <name>dfs.block.invalidate.limit</name>
        <value>2000</value>
      </property>
      <property>
        <name>dfs.namenode.acls.enabled</name>
        <value>true</value>
      </property>
      <property>
        <name>dfs.blockreport.incremental.intervalMsec</name>
        <value>50</value>
      </property>
      <property>
        <name>dfs.namenode.checkpoint.txns</name>
        <value>3000000</value>
      </property>
      <property>
        <name>dfs.qjournal.write-txns.timeout.ms</name>
        <value>90000</value>
      </property>
      <property>
        <name>dfs.qjournal.start-segment.timeout.ms</name>
        <value>90000</value>
      </property>
      <property>
        <name>dfs.qjournal.select-input-streams.timeout.ms</name>
        <value>90000</value>
      </property>
      <property>
        <name>dfs.namenode.audit.log.async</name>
        <value>true</value>
      </property>
      <property>
        <name>dfs.namenode.servicerpc-address.bigbigworld.nn1</name>
        <value>10.83.192.6:8041</value>
      </property>
      <property>
        <name>dfs.namenode.servicerpc-address.bigbigworld.nn2</name>
        <value>10.83.192.7:8041</value>
      </property>
    </configuration>
忽略 修改 coordinator/worker configmap
shell 复制代码
# vim templates/configmap-coordinator.yaml 
# vim templates/configmap-worker.yaml 
# configmap-coordinator.yaml和configmap-worker.yaml的jvm.config 增加如下配置
# HADOOP_USER_NAME为Hadoop集群管理员用户
-DHADOOP_USER_NAME=admin
忽略 修改 service
shell 复制代码
# vim templates/service.yaml

spec:
  type: {{ .Values.service.type }}
  ports:
    - port: {{ .Values.service.port }}
      targetPort: http
      protocol: TCP
      name: http
      # 新增如下配置
      nodePort: {{ .Values.service.nodePort }}
忽略 修改 coordinator/worker deploy
shell 复制代码
# vim templates/deployment-coordinator.yaml 
# vim templates/deployment-worker.yaml
# deployment-coordinator.yaml和deployment-worker.yaml的volumes、volumeMounts 增加配置

      volumes:
        # 新增如下配置
        - name: core-site
          configMap:
            name: {{ include "trino.fullname" . }}-hadoop
        - name: hdfs-site
          configMap:
            name: {{ include "trino.fullname" . }}-hadoop

          volumeMounts:
            # 新增如下配置
            - mountPath: /tmp/hdfs-site.xml
              name: hdfs-site
              subPath: hdfs-site.xml
            - mountPath: /tmp/core-site.xml
              name: core-site
              subPath: core-site.xml
验证
shell 复制代码
# 进入 trino 终端
coordinator_name=`kubectl get pods -n trino-dockerhub|grep coordinator|awk '{print $1}'`
# kubectl exec -it $coordinator_name -n trino-test -- bash
kubectl exec -it $coordinator_name -n trino-test -- /usr/bin/trino --server http://127.0.0.1:8080 --catalog=hive --schema=default --user=admin

kubectl exec -it $coordinator_name -n trino-dockerhub -- /usr/bin/trino --server http://trino-dockerhub:8080 --catalog=hive --schema=default --user=admin

# 查询 trino 自带数据库正常
select * from system.runtime.nodes;

# 读写hive表正常
Worker扩缩容
shell 复制代码
kubectl get deployments,statefulsets -n trino-test | grep trino
# 扩缩容
kubectl scale deploy trino-test-trino-worker --replicas=3 -n trino-test

集成Paimon【目前 0.8版本Paimon 仅支持 Trino 420/427】

https://trino.io/docs/current/connector.html

增加 Paimon Catalog
shell 复制代码
# vim values.yaml

additionalCatalogs:
  hive: |-
    connector.name=hive
    hive.metastore.uri=thrift://10.83.192.8:9083,thrift://10.83.192.9:9083
    hive.config.resources=/tmp/core-site.xml,/tmp/hdfs-site.xml
  paimon: |-
    connector.name=paimon
    warehouse=hdfs://bigbigworld/user/hive/warehouse
    hive.config.resources=/tmp/core-site.xml,/tmp/hdfs-site.xml
重新构建镜像 挂载paimon jar包
shell 复制代码
# vim Dockerfile

FROM 192.168.234.10:80/trinodb/trino:476

ADD paimon-trino-440-1.0-20241217.000333-26-plugin.tar.gz /usr/lib/trino/plugin/
shell 复制代码
# 构建镜像
docker build -t 192.168.234.10:80/trinodb/trino-paimon:440 -f Dockerfile . --no-cache
# 推送
docker push 192.168.234.10:80/trinodb/trino-paimon:440 
忽略挂载Paimon jar包
shell 复制代码
# 下载 paimon plugin
wget https://repository.apache.org/content/groups/snapshots/org/apache/paimon/paimon-trino-427/0.8-SNAPSHOT/paimon-trino-427-0.8-20240516.000452-23-plugin.tar.gz
tar -zxvf paimon-trino-427-0.8-20240516.000452-23-plugin.tar.gz -C /data/
# /lib/trino/plugin/paimon-trino-427-0.8-20240514.000510-22.jar

# vim templates/deployment-coordinator.yaml 
# vim templates/deployment-worker.yaml
# deployment-coordinator.yaml和deployment-worker.yaml的volumes、volumeMounts 增加配置

      volumes:
        - name: paimon-jar
          hostPath:
            path: /data/paimon
            type: DirectoryOrCreate

          volumeMounts:
            # 新增如下配置
            - mountPath: /lib/trino/plugin/paimon
              name: paimon-jar
              

# 更新
helm upgrade trino-dockerhub ./ -n trino-dockerhub
shell 复制代码
# 和 Chart.yaml 同级目录下执行
helm install trino-dockerhub ./ -n trino-dockerhub --create-namespace


# 卸载
# helm uninstall trino-dockerhub -n trino-dockerhub

kubectl get po -n trino-dockerhub

coordinator_name=`kubectl get pods -n trino-dockerhub|grep coordinator|awk '{print $1}'`
kubectl exec -it $coordinator_name -n trino-dockerhub -- sh

kubectl describe po $coordinator_name  -n trino-dockerhub 
kubectl logs $coordinator_name  -n trino-dockerhub  --all-containers
相关推荐
kong@react9 分钟前
wsl2安装及命令(详细教程)
java·docker·容器
学Linux的语莫13 分钟前
k8s知识点整体概览
java·linux·kubernetes
VermiliEiz27 分钟前
二进制文件方式部署k8s(2)
kubernetes·云计算·etcd
古城小栈1 小时前
Spring Boot 容器化:Docker+K8s 部署最佳实践
spring boot·docker·kubernetes
学习3人组1 小时前
docker run 命令详解
运维·docker·容器
神秘面具男032 小时前
Containerd 容器管理工具
容器
阿方索2 小时前
Docker
运维·docker·容器
番茄撒旦在上3 小时前
Docker部署springboot项目
服务器·spring boot·docker·容器
rocksun3 小时前
记一次全自动的问题诊断过程
kubernetes·devops·vibecoding
不想画图3 小时前
Docker 容器核心配置实战:资源管控、数据卷与容器互联
运维·docker·容器