1、部署node-exporter
1)helm方式部署
bash
root@iZj6c72dzbei17o2cuksmeZ:~# helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
"prometheus-community" has been added to your repositories
root@iZj6c72dzbei17o2cuksmeZ:~# helm repo update
Hang tight while we grab the latest from your chart repositories...
...Successfully got an update from the "prometheus-community" chart repository
Update Complete. ⎈Happy Helming!⎈
root@iZj6c72dzbei17o2cuksmeZ:~# helm install prometheus-node-exporter prometheus-community/prometheus-node-exporter
NAME: prometheus-node-exporter
LAST DEPLOYED: Sun Apr 27 10:56:21 2025
NAMESPACE: default
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
1. Get the application URL by running these commands:
export POD_NAME=$(kubectl get pods --namespace default -l "app.kubernetes.io/name=prometheus-node-exporter,app.kubernetes.io/instance=prometheus-node-exporter" -o jsonpath="{.items[0].metadata.name}")
echo "Visit http://127.0.0.1:9100 to use your application"
kubectl port-forward --namespace default $POD_NAME 9100
2)docker-compose部署
yaml
root@iZj6c2vhsafoay7j7vyy89Z:~# cat /data/docker-compose/node-exporter/docker-compose.yaml
version: '3.8'
services:
node-exporter:
image: prom/node-exporter:latest
container_name: node-exporter
restart: unless-stopped
ports:
- "9100:9100"
network_mode: "host" # 使用宿主机网络,方便采集真实的主机数据
pid: "host" # 让容器可以访问宿主机的 /proc
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--path.rootfs=/rootfs'
- '--collector.filesystem.ignored-mount-points=^/(sys|proc|dev|host|etc)($$|/)'
部署
bash
root@iZj6c2vhsafoay7j7vyy89Z:/data/docker-compose/node-exporter# docker-compose up -d
Pulling node-exporter (prom/node-exporter:latest)...
latest: Pulling from prom/node-exporter
9fa9226be034: Pull complete
1617e25568b2: Pull complete
c6e37428e3b3: Pull complete
Digest: sha256:d00a542e409ee618a4edc67da14dd48c5da66726bbd5537ab2af9c1dfc442c8a
Status: Downloaded newer image for prom/node-exporter:latest
Creating node-exporter ... done
2、部署endpoints、service、ServiceMonitor
endpoints的yaml文件
root@iZj6c72dzbei17o2cuksmeZ:~/yaml# cat zjj-endpoints.yaml
yaml
apiVersion: v1
kind: Endpoints
metadata:
annotations: {}
labels:
app: zjj
jobname: node-exporter
source: external
name: zjj
namespace: monitoring
subsets:
- addresses:
- hostname: zjj
ip: 172.16.0.20
ports:
- name: metrics
port: 9100
protocol: TCP
service的yaml文件
root@iZj6c72dzbei17o2cuksmeZ:~/yaml# cat zjj-nodeporter-service.yaml
yaml
---
apiVersion: v1
kind: Service
metadata:
annotations: {}
labels:
app: zjj
jobname: node-exporter
source: external
name: zjj
namespace: monitoring
resourceVersion: '552219945'
spec:
internalTrafficPolicy: Cluster
ipFamilies:
- IPv4
ipFamilyPolicy: SingleStack
ports:
- name: metrics
port: 9100
protocol: TCP
targetPort: 9100
sessionAffinity: None
type: ClusterIP
ServiceMonitor的yaml文件
root@iZj6c72dzbei17o2cuksmeZ:~/yaml# cat zjj-servicemonitor.yaml
yaml
---
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
annotations: {}
labels:
app: zjj
release: kube-prometheus
source: external
name: zjj
namespace: monitoring
resourceVersion: '294048548'
spec:
endpoints:
- port: metrics
jobLabel: jobname
namespaceSelector:
matchNames:
- monitoring
selector:
matchLabels:
app: zjj
source: external
启动对应的服务
bash
root@iZj6c72dzbei17o2cuksmeZ:~/yaml# kubectl apply -f zjj-endpoints.yaml
endpoints/zjj created
root@iZj6c72dzbei17o2cuksmeZ:~/yaml# kubectl apply -f zjj-nodeporter-service.yaml
service/zjj created
root@iZj6c72dzbei17o2cuksmeZ:~/yaml# kubectl apply -f zjj-servicemonitor.yaml
servicemonitor.monitoring.coreos.com/zjj created
bash
root@iZj6c72dzbei17o2cuksmeZ:~/yaml# kubectl get endpoints zjj -n monitoring
NAME ENDPOINTS AGE
zjj 172.16.0.20:9100 55m
root@iZj6c72dzbei17o2cuksmeZ:~/yaml# kubectl get service zjj -n monitoring
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
zjj ClusterIP 10.68.140.25 <none> 9100/TCP 54m
root@iZj6c72dzbei17o2cuksmeZ:~/yaml# kubectl get servicemonitor zjj -n monitoring
NAME AGE
zjj 55m
3、创建alertmanager报警模块
root@iZj6c72dzbei17o2cuksmeZ:~/kube-prometheus-0.14.0# cat manifests/zjj-nodeExporter-prometheusRule.yaml
yaml
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
labels:
app.kubernetes.io/component: nodeInstances
app.kubernetes.io/name: NodeInstances
app.kubernetes.io/part-of: kube-prometheus
app.kubernetes.io/version: 1.8.2
prometheus: k8s
role: alert-rules
name: node-instances-rules
namespace: monitoring
spec:
groups:
- name: zjj-rules
rules:
- alert: NodeInstancesDown
annotations:
description: Failed to scrape {{ $labels.job }} on {{ $labels.instance }} for
more than 1 minutes. Node seems down.
title: Node {{ $labels.instance }} is down
expr: up{job="node-exporter"} == 0
for: 1m
labels:
severity: critical
启动新的规则
bash
root@iZj6c72dzbei17o2cuksmeZ:~/kube-prometheus-0.14.0# kubectl apply -f manifests/zjj-nodeExporter-prometheusRule.yaml