写在前面:
在 Kubernetes 集群上运行多个服务和应用程序时,日志收集系统可以帮助你快速分类和分析由 Pod 生成的大量日志数据。Kubernetes 中比较流行的日志收集解决方案是 Elasticsearch、Fluentd 和 Kibana(EFK)技术栈。
Fluentd 和logstash 的区别:
Logstash:
-
偏向"中心化"架构:集中式收集处理
-
Java 写的,运行在 JVM 上,内存占用大;但对复杂日志处理(比如 Grok 多层正则匹配)非常强
-
常和 Beats(轻量 agent)搭配:
Filebeat → Logstash → Elasticsearch
Fluentd:
-
原生为分布式/容器化环境设计
-
Fluentd 更轻、更省资源,在 Kubernetes 集群里,一个 Fluentd DaemonSet 通常只占几十 MB 内存
-
通常和 Fluent Bit 搭配:
Fluent Bit → Fluentd → Elasticsearch
关于ELK的基础知识请参考我的其他文章。
https://xingzhibang.top/archives/elkxi-lie-zhi-yi-elasticsearch-bu-shu-shi-zhan
1. 安装 elasticsearch 组件
1.1 创建名称空间
[root@k8s-master /manifests]# kubectl create ns kube-logging
1.2 创建 nfs和rbac授权
## 这里我们使用nfs来实现共享存储
--- 1. 安装nfs服务端
--- 2. 在 k8s-master 上创建一个 nfs 共享目录
[root@k8s-master /manifests]# mkdir -p /data/nfs/es/
## 编辑/etc/exports 文件
[root@k8s-master /manifests]# cat /etc/exports
/data/nfs/es *(rw,sync,no_root_squash)
## 加载配置,使配置生效
[root@k8s-master /manifests]# systemctl restart nfs
--- 3. 创建 nfs 作为存储的供应商
[root@k8s-master /manifests/serviceaccount/es]# cat serviceaccount.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: nfs-provisioner
--- 4. 对 sa 做 rbac 授权
[root@k8s-master /manifests/serviceaccount/es]# cat rbac.yaml
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: nfs-provisioner-runner
rules:
- apiGroups: [""]
resources: ["persistentvolumes"]
verbs: ["get", "list", "watch", "create", "delete"]
- apiGroups: [""]
resources: ["persistentvolumeclaims"]
verbs: ["get", "list", "watch", "update"]
- apiGroups: ["storage.k8s.io"]
resources: ["storageclasses"]
verbs: ["get", "list", "watch"]
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update", "patch"]
- apiGroups: [""]
resources: ["services", "endpoints"]
verbs: ["get"]
- apiGroups: ["extensions"]
resources: ["podsecuritypolicies"]
resourceNames: ["nfs-provisioner"]
verbs: ["use"]
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: run-nfs-provisioner
subjects:
- kind: ServiceAccount
name: nfs-provisioner
namespace: default
roleRef:
kind: ClusterRole
name: nfs-provisioner-runner
apiGroup: rbac.authorization.k8s.io
---
kind: Role
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-provisioner
rules:
- apiGroups: [""]
resources: ["endpoints"]
verbs: ["get", "list", "watch", "create", "update", "patch"]
---
kind: RoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: leader-locking-nfs-provisioner
subjects:
- kind: ServiceAccount
name: nfs-provisioner
namespace: default
roleRef:
kind: Role
name: leader-locking-nfs-provisioner
apiGroup: rbac.authorization.k8s.io
1.3 创建 Storageclass,实现存储类动态供给
可以参考我前面文章:
https://xingzhibang.top/archives/k8sde-pv-pvcde-cun-chu-ce-lue
## 拉取开源配置器后
--- 1. 修改deployment.yaml
[root@k8s-master /manifests/pv/k8s-external-storage/nfs-client/deploy]# cat deployment.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: nfs-provisioner
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: nfs-provisioner
template:
metadata:
labels:
app: nfs-provisioner
spec:
serviceAccountName: nfs-provisioner
containers:
- name: nfs-provisioner
image: 192.168.0.77:32237/uat/nfs-subdir-external-provisioner:latest
volumeMounts:
- name: nfs-client-root
mountPath: /persistentvolumes
env:
- name: PROVISIONER_NAME
value: example.com/nfs
- name: NFS_SERVER
## nfs 服务端的ip地址
value: 192.168.0.160
- name: NFS_PATH
## nfs的共享目录
value: /data/nfs/es
volumes:
- name: nfs-client-root
nfs:
server: 192.168.0.160
path: /data/nfs/es
--- 2. 修改class.yaml
[root@k8s-master /manifests/pv/k8s-external-storage/nfs-client/deploy]# cat class.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: do-block-storage
provisioner: example.com/nfs
parameters:
archiveOnDelete: "true"
## provisioner: example.com/nfs
## 该值需要和 nfs provisioner 配置的 PROVISIONER_NAME 处的 value 值保持一致
--- 3. 创建pv,pvc
[root@k8s-master /manifests/pv/k8s-external-storage/nfs-client/deploy]# kubectl apply -f deployment.yaml
[root@k8s-master /manifests/pv/k8s-external-storage/nfs-client/deploy]# kubectl apply -f class.yaml
1.4 创建 headless service 服务
[root@k8s-master /manifests/service]# cat elasticsearch_svc.yaml
kind: Service
apiVersion: v1
metadata:
name: elasticsearch
namespace: kube-logging
labels:
app: elasticsearch
spec:
selector:
app: elasticsearch
clusterIP: None
ports:
- name: rest
port: 9200
- name: inter-node
port: 9300
1.5 通过 statefulset 创建 elasticsearch 集群
[root@k8s-master /manifests/statefulset]# cat es-statefulset.yaml
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: es-cluster
namespace: kube-logging
spec:
serviceName: elasticsearch
replicas: 3
selector:
matchLabels:
app: elasticsearch
template:
metadata:
labels:
app: elasticsearch
spec:
containers:
- name: elasticsearch
## 这里的镜像使用的是我的私有仓库下的
image: 192.168.0.77:32237/elastic/elasticsearch:7.17.5
resources:
limits:
cpu: 1000m
requests:
cpu: 100m
ports:
## 容器暴露了 9200 和 9300 两个端口,名称要和上面定义的 Service 保持一致
- containerPort: 9200
name: rest
protocol: TCP
- containerPort: 9300
name: inter-node
protocol: TCP
## 通过 volumeMount 声明了数据持久化目录
volumeMounts:
- name: data
mountPath: /usr/share/elasticsearch/data
env:
- name: cluster.name
value: k8s-logs
- name: node.name
valueFrom:
fieldRef:
fieldPath: metadata.name
## 确保可以使用以下 DNS 地址访问 StatefulSet 中的每个 Pod,你的域名可能不一样哦
- name: discovery.seed_hosts
value: "es-cluster-0.elasticsearch.kube-logging.svc.xingzhibang.top,es-cluster-1.elasticsearch.kube-logging.svc.xingzhibang.top,es-cluster-2.elasticsearch.kube-logging.svc.xingzhibang.top"
- name: cluster.initial_master_nodes
value: "es-cluster-0,es-cluster-1,es-cluster-2"
- name: ES_JAVA_OPTS
value: "-Xms512m -Xmx512m"
initContainers:
## 将 Elasticsearch 数据目录的用户和组更改为 1000:1000,因为默认情况下,Kubernetes用root用户挂载数据目录,这会使得 Elasticsearch 无法访问该数据目录
- name: fix-permissions
## 这里的镜像使用的是私有仓库下的镜像
image: 192.168.0.77:32237/gpaas/busybox:latest
command: ["sh", "-c", "chown -R 1000:1000 /usr/share/elasticsearch/data"]
securityContext:
privileged: true
volumeMounts:
- name: data
mountPath: /usr/share/elasticsearch/data
## 用来增加操作系统对 mmap 计数的限制,默认情况下该值可能太低,导致内存不足的错误
- name: increase-vm-max-map
image: 192.168.0.77:32237/gpaas/busybox:latest
command: ["sysctl", "-w", "vm.max_map_count=262144"]
securityContext:
privileged: true
## 用来执行 ulimit 命令增加打开文件描述符的最大数量的
- name: increase-fd-ulimit
image: 192.168.0.77:32237/gpaas/busybox:latest
command: ["sh", "-c", "ulimit -n 65536"]
securityContext:
privileged: true
volumeClaimTemplates:
- metadata:
name: data
labels:
app: elasticsearch
spec:
accessModes: [ "ReadWriteOnce" ]
## storageClassName 这里的名称要和上面的Storageclass 名称一致哦
storageClassName: do-block-storage
resources:
requests:
storage: 10Gi
## 查看 es 的 pod 是否创建成功
[root@k8s-master /manifests/statefulset]# kubectl get pod -n kube-logging
NAME READY STATUS RESTARTS AGE
es-cluster-0 1/1 Running 1 (4h29m ago) 4h46m
es-cluster-1 1/1 Running 0 4h46m
es-cluster-2 1/1 Running 1 (4h29m ago) 4h47m
[root@k8s-master /manifests/statefulset]# kubectl get svc -n kube-logging
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
elasticsearch ClusterIP None <none> 9200/TCP,9300/TCP 3h59m
## pod 部署完成之后,可以通过 REST API 检查 elasticsearch 集群是否部署成功,使用下面的命令将本地端口 9200 转发到 Elasticsearch 节点(如 es-cluster-0)对应的端口:
[root@k8s-master /manifests/statefulset]# kubectl port-forward es-cluster-0 9200:9200 --namespace=kube-logging
Forwarding from 127.0.0.1:9200 -> 9200
Forwarding from [::1]:9200 -> 9200
...
## 在另外的终端窗口中,执行如下请求,新开一个 k8s-master终端
[root@k8s-master /manifests/statefulset]# curl http://localhost:9200/_cluster/state?pretty
{
"cluster_name" : "k8s-logs",
"cluster_uuid" : "PwbKiXLqRXuJT5jZupPMnQ",
"version" : 63,
"state_uuid" : "9rALebD0RkaESqwkS4sbRg",
....
}
## 如果是503的问题的话可以用nslook查看一下DNS解析是否正确
[root@k8s-master /manifests/statefulset]# kubectl run dns-test --image=busybox:1.28 --rm -it --restart=Never -- /bin/sh
#/ cat /etc/resolv.conf
nameserver 10.200.0.10
search default.svc.xingzhibang.top svc.xingzhibang.top xingzhibang.top
options ndots:5
/ # nslookup es-cluster-0.elasticsearch.kube-logging.svc.xingzhibang.top
Server: 10.200.0.10
Address 1: 10.200.0.10 kube-dns.kube-system.svc.xingzhibang.top
Name: es-cluster-0.elasticsearch.kube-logging.svc.xingzhibang.top
Address 1: 10.100.58.203 es-cluster-0.elasticsearch.kube-logging.svc.xingzhibang.top
2. 安装 kibana组件
[root@k8s-master /manifests/deployment]# cat kibana.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: kibana
namespace: kube-logging
labels:
app: kibana
spec:
replicas: 1
selector:
matchLabels:
app: kibana
template:
metadata:
labels:
app: kibana
spec:
containers:
- name: kibana
## 这里的镜像使用的是私有仓库的
image: 192.168.0.77:32237/elastic/kibana:7.17.5
resources:
limits:
cpu: 1000m
requests:
cpu: 100m
env:
- name: ELASTICSEARCH_URL
## 这里DNS的域名和端口要正确哦
value: http://elasticsearch.kube-logging.svc.xingzhibang.top:9200
ports:
- containerPort: 5601
[root@k8s-master /manifests/service]# cat kibana.yaml
apiVersion: v1
kind: Service
metadata:
name: kibana
namespace: kube-logging
labels:
app: kibana
spec:
ports:
- port: 5601
targetPort: 5601
selector:
app: kibana
type: NodePort
[root@k8s-master /manifests/service]# kubectl get svc -n kube-logging
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
elasticsearch ClusterIP None <none> 9200/TCP,9300/TCP 4h19m
kibana NodePort 10.200.145.233 <none> 5601:31316/TCP 4h19m
## 在浏览器中打开 http://<任意节点 IP>:31966 即可
1.3 fluentd
--- 1. 创建fluentd configmap 配置文件
[root@k8s-master /manifests/configmap]# cat fluentd-configmap.yaml
apiVersion: v1
kind: ConfigMap
metadata:
name: fluentd-config
namespace: kube-logging
data:
fluent.conf: |
# 输入:收集容器日志
<source>
@type tail
@id in_tail_container_logs
path /var/log/containers/*.log
pos_file /var/log/fluentd-containers.log.pos
tag kubernetes.*
read_from_head true
<parse>
@type json
time_format %Y-%m-%dT%H:%M:%S.%NZ
</parse>
</source>
# 过滤:添加 Kubernetes 元数据
<filter kubernetes.**>
@type kubernetes_metadata
@id filter_kube_metadata
</filter>
# 输出:发送到 Elasticsearch
<match **>
@type elasticsearch
@id out_es
@log_level info
include_tag_key true
host "#{ENV['FLUENT_ELASTICSEARCH_HOST']}"
port "#{ENV['FLUENT_ELASTICSEARCH_PORT']}"
scheme "#{ENV['FLUENT_ELASTICSEARCH_SCHEME']}"
logstash_format true
logstash_prefix logstash
<buffer>
@type file
path /var/log/fluentd-buffers/kubernetes.system.buffer
flush_mode interval
retry_type exponential_backoff
flush_thread_count 2
flush_interval 5s
retry_forever false
retry_max_interval 30
chunk_limit_size 2M
queue_limit_length 8
overflow_action block
</buffer>
</match>
--- 2. 部署fluentd
[root@k8s-master /manifests/daemonset]# cat fluentd.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: fluentd
namespace: kube-logging
labels:
app: fluentd
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
name: fluentd
labels:
app: fluentd
rules:
- apiGroups:
- ""
resources:
- pods
- namespaces
verbs:
- get
- list
- watch
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: fluentd
roleRef:
kind: ClusterRole
name: fluentd
apiGroup: rbac.authorization.k8s.io
subjects:
- kind: ServiceAccount
name: fluentd
namespace: kube-logging
---
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: fluentd
namespace: kube-logging
labels:
app: fluentd
spec:
selector:
matchLabels:
app: fluentd
template:
metadata:
labels:
app: fluentd
spec:
serviceAccount: fluentd
serviceAccountName: fluentd
tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule
containers:
- name: fluentd
## 这里的镜像需要注意下哦,最好和我是相同的
image: 192.168.0.77:32237/fluent/fluentd-kubernetes-daemonset:v1.14-debian-elasticsearch7-1
env:
- name: FLUENT_ELASTICSEARCH_HOST
value: "elasticsearch.kube-logging.svc.xingzhibang.top"
- name: FLUENT_ELASTICSEARCH_PORT
value: "9200"
- name: FLUENT_ELASTICSEARCH_SCHEME
value: "http"
- name: FLUENTD_SYSTEMD_CONF
value: disable
resources:
limits:
memory: 512Mi
requests:
cpu: 100m
memory: 200Mi
volumeMounts:
- name: varlog
mountPath: /var/log
- name: varlibdockercontainers
mountPath: /var/lib/docker/containers
readOnly: true
## 挂载配置文件
- name: fluentd-config
mountPath: /fluentd/etc
terminationGracePeriodSeconds: 30
volumes:
- name: varlog
hostPath:
path: /var/log
- name: varlibdockercontainers
hostPath:
path: /var/lib/docker/containers
- name: fluentd-config
configMap:
name: fluentd-config
如果想详细了解更多知识的小伙伴可以参考我的个人博客: