docker 部署tig监控服务

前言

  1. tig对应的服务是influxdb grafana telegraf
  2. 此架构比传统的promethus架构更为简洁,虽然influxdb开源方案没有集群部署,但是对于中小型服务监控需求该方案简单高效
  3. 本文以docker-compose来演示这套监控体系的快速搭建和效果。

部署

docker-compose.yaml

yaml 复制代码
version: '3'
networks:
  monitor:
    driver: bridge
#配置应用
services:
  #grafana 报警推送 
  #账号密码 prometheusalert prometheusalert
  prometheusalert:
    image: feiyu563/prometheus-alert
    container_name:   prometheusalert
    hostname:   prometheusalert
    restart: always
    ports:
      - 8087:8080
    networks:
      - monitor
    volumes:
      - ./docker/prometheusalert/conf:/app/conf
      - ./docker/prometheusalert/db:/app/db
    environment:
      - PA_LOGIN_USER=prometheusalert
      - PA_LOGIN_PASSWORD=prometheusalert
      - PA_TITLE=PrometheusAlert
      - PA_OPEN_FEISHU=1
  #界面展示 默认账号密码 admin admin
  grafana:
    image: grafana/grafana
    container_name: grafana
    hostname: grafana
    restart: always
    volumes:
      - ./docker/grafana/data/grafana:/var/lib/grafana
    ports:
      - "3000:3000"
    networks:
      - monitor
     
  #influxdb数据库v2自带管理端
  #账号密码 root root
  influxdb:
    image: influxdb
    container_name: influxdb
    environment:
      INFLUX_DB: test                   # 可能无效
      INFLUXDB_USER: root               # 可能无效
      INFLUXDB_USER_PASSWORD: root      # 可能无效
    ports:
      - "8086:8086"
    restart: always
    volumes:
      - ./docker/influxdb/:/var/lib/influxdb
    networks:
      - monitor
  #indluxdb数据库v1
  #influxdb1x: 
  #  image: influxdb:1.8
  #  container_name: influxdb1.8
  #  environment:
  #    INFLUXDB_DB: test
  #    INFLUXDB_ADMIN_ENABLED: true
  #    INFLUXDB_ADMIN_USER: root
  #    INFLUXDB_ADMIN_PASSWORD: root
  #  ports:
  #    - "8098:8086"
  #  restart: always
  #  volumes:
  #    - ./docker/influxdb1x/influxdb1x.conf:/etc/influxdb/influxdb.conf
  #    - ./docker/influxdb1x/:/var/lib/influxdb
  #  networks:
  #    - monitor

telegraf 安装 官方文档

# telegraf是采集端,部署于监控数据的源头,详细的部署教程可以通过官网,下面以linux服务器为例子
# 编写源
cat <<EOF | sudo tee /etc/yum.repos.d/influxdb.repo
[influxdb]
name = InfluxData Repository - Stable
baseurl = https://repos.influxdata.com/stable/\$basearch/main
enabled = 1
gpgcheck = 1
gpgkey = https://repos.influxdata.com/influxdata-archive_compat.key
EOF

# 安装
sudo yum install telegraf

# 校验
telegraf --help 

使用

  1. 登陆influxdb http://localhost:8086.
    • 首次登陆会创建账号
    • org是分区意思
    • buk是库的概念

  2. 配置telegraf采集etl流程 官方telegraf采集插件介绍
    • 配置influxdb数据的访问的token
    • 配置telegraf采集的配置文件
    • 采集端启动telegraf采集etl
      a. 配置tokenb. 可以通过平台生成配置文件,也可以自己保存配置文件。平台生成配置则提供http接口远程提供配置下载
      以nginx文件为例提供配置
      telegraf.conf
yaml 复制代码
# Configuration for telegraf agent 
# telegraf 采集端配置都是默认配置
[agent]
  ## Default data collection interval for all inputs
  interval = "10s"
  ## Rounds collection interval to 'interval'
  ## ie, if interval="10s" then always collect on :00, :10, :20, etc.
  round_interval = true

  ## Telegraf will send metrics to outputs in batches of at most
  ## metric_batch_size metrics.
  ## This controls the size of writes that Telegraf sends to output plugins.
  metric_batch_size = 1000

  ## Maximum number of unwritten metrics per output.  Increasing this value
  ## allows for longer periods of output downtime without dropping metrics at the
  ## cost of higher maximum memory usage.
  metric_buffer_limit = 10000

  ## Collection jitter is used to jitter the collection by a random amount.
  ## Each plugin will sleep for a random time within jitter before collecting.
  ## This can be used to avoid many plugins querying things like sysfs at the
  ## same time, which can have a measurable effect on the system.
  collection_jitter = "0s"

  ## Default flushing interval for all outputs. Maximum flush_interval will be
  ## flush_interval + flush_jitter
  flush_interval = "10s"
  ## Jitter the flush interval by a random amount. This is primarily to avoid
  ## large write spikes for users running a large number of telegraf instances.
  ## ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
  flush_jitter = "0s"

  ## By default or when set to "0s", precision will be set to the same
  ## timestamp order as the collection interval, with the maximum being 1s.
  ##   ie, when interval = "10s", precision will be "1s"
  ##       when interval = "250ms", precision will be "1ms"
  ## Precision will NOT be used for service inputs. It is up to each individual
  ## service input to set the timestamp at the appropriate precision.
  ## Valid time units are "ns", "us" (or "µs"), "ms", "s".
  precision = ""

  ## Log at debug level.
  # debug = false
  ## Log only error level messages.
  # quiet = false

  ## Log target controls the destination for logs and can be one of "file",
  ## "stderr" or, on Windows, "eventlog".  When set to "file", the output file
  ## is determined by the "logfile" setting.
  # logtarget = "file"

  ## Name of the file to be logged to when using the "file" logtarget.  If set to
  ## the empty string then logs are written to stderr.
  # logfile = ""

  ## The logfile will be rotated after the time interval specified.  When set
  ## to 0 no time based rotation is performed.  Logs are rotated only when
  ## written to, if there is no log activity rotation may be delayed.
  # logfile_rotation_interval = "0d"

  ## The logfile will be rotated when it becomes larger than the specified
  ## size.  When set to 0 no size based rotation is performed.
  # logfile_rotation_max_size = "0MB"

  ## Maximum number of rotated archives to keep, any older logs are deleted.
  ## If set to -1, no archives are removed.
  # logfile_rotation_max_archives = 5

  ## Pick a timezone to use when logging or type 'local' for local time.
  ## Example: America/Chicago
  # log_with_timezone = ""

  ## Override default hostname, if empty use os.Hostname()
  hostname = ""
  ## If set to true, do no set the "host" tag in the telegraf agent.
  omit_hostname = false

# influxdb_v2 输出插件配置 这里需要配置的
# 数据库地址:urls 分区:organization 库:bucket 授权token:token
[[outputs.influxdb_v2]]
  ## The URLs of the InfluxDB cluster nodes.
  ##
  ## Multiple URLs can be specified for a single cluster, only ONE of the
  ## urls will be written to each interval.
  ##   ex: urls = ["https://us-west-2-1.aws.cloud2.influxdata.com"]
  urls = ["http://localhost:8086"] 

  ## Token for authentication.
  token = "上一步创建的token"

  ## Organization is the name of the organization you wish to write to; must exist.
  organization = "创建的分区 这里是test"

  ## Destination bucket to write into.
  bucket = "创建的表 这里是test"

  ## The value of this tag will be used to determine the bucket.  If this
  ## tag is not set the 'bucket' option is used as the default.
  # bucket_tag = ""

  ## If true, the bucket tag will not be added to the metric.
  # exclude_bucket_tag = false

  ## Timeout for HTTP messages.
  # timeout = "5s"

  ## Additional HTTP headers
  # http_headers = {"X-Special-Header" = "Special-Value"}

  ## HTTP Proxy override, if unset values the standard proxy environment
  ## variables are consulted to determine which proxy, if any, should be used.
  # http_proxy = "http://corporate.proxy:3128"

  ## HTTP User-Agent
  # user_agent = "telegraf"

  ## Content-Encoding for write request body, can be set to "gzip" to
  ## compress body or "identity" to apply no encoding.
  # content_encoding = "gzip"

  ## Enable or disable uint support for writing uints influxdb 2.0.
  # influx_uint_support = false

  ## Optional TLS Config for use on HTTP connections.
  # tls_ca = "/etc/telegraf/ca.pem"
  # tls_cert = "/etc/telegraf/cert.pem"
  # tls_key = "/etc/telegraf/key.pem"
  ## Use TLS but skip chain & host verification
  # insecure_skip_verify = false

# Parse the new lines appended to a file
# tail 输入插件配置,以监听nginx日志为例 需要配置
# 监听文件位置 files 
# nginx行数据解析表达式 grok_patterns 提取监控字段,gork表达式不单独说明了
# nginx监控数据存储表名 name_override
[[inputs.tail]]
  ## File names or a pattern to tail.
  ## These accept standard unix glob matching rules, but with the addition of
  ## ** as a "super asterisk". ie:
  ##   "/var/log/**.log"  -> recursively find all .log files in /var/log
  ##   "/var/log/*/*.log" -> find all .log files with a parent dir in /var/log
  ##   "/var/log/apache.log" -> just tail the apache log file
  ##   "/var/log/log[!1-2]*  -> tail files without 1-2
  ##   "/var/log/log[^1-2]*  -> identical behavior as above
  ## See https://github.com/gobwas/glob for more examples
  ##
  files = ["/logs/nginx/access_main.log"]

  ## Read file from beginning.
  #from_beginning = false

  ## Whether file is a named pipe
  # pipe = false

  ## Method used to watch for file updates.  Can be either "inotify" or "poll".
  # watch_method = "inotify"

  ## Maximum lines of the file to process that have not yet be written by the
  ## output.  For best throughput set based on the number of metrics on each
  ## line and the size of the output's metric_batch_size.
  # max_undelivered_lines = 1000

  ## Character encoding to use when interpreting the file contents.  Invalid
  ## characters are replaced using the unicode replacement character.  When set
  ## to the empty string the data is not decoded to text.
  ##   ex: character_encoding = "utf-8"
  ##       character_encoding = "utf-16le"
  ##       character_encoding = "utf-16be"
  ##       character_encoding = ""
  # character_encoding = ""

  ## Data format to consume.
  ## Each data format has its own unique set of configuration options, read
  ## more about them here:
  ## https://github.com/influxdata/telegraf/blob/master/docs/DATA_FORMATS_INPUT.md
  grok_patterns = ["%{NGINX_ACCESS_LOG}"]
  name_override = "nginx_access_log"
  #grok_custom_pattern_files = []
  #grok_custom_patterns = '''
  #NGINX_ACCESS_LOG %{IP:remote_addr} - (-|%{WORD:remote_user}) [%{HTTPDATE:time_local}] %{BASE10NUM:request_time:float} (-|%{BASE10NUM:upstream_response_time:float}) %{IPORHOST:host} %{QS:request} %{NUMBER:status:int} %{NUMBER:body_bytes_sent:int} %{QS:referrer} %{QS:agent} %{IPORHOST:xforwardedfor}
  #'''
  grok_custom_patterns = '''
  NGINX_ACCESS_LOG %{IP:remote_addr} - (-|%{WORD:remote_user:drop}) \[%{HTTPDATE:ts:ts}\] %{BASE10NUM:request_time:float} %{BASE10NUM:upstream_response_time:float} %{IPORHOST:host:tag} "(?:%{WORD:verb:drop} %{NOTSPACE:request:tag}(?: HTTP/%{NUMBER:http_version:drop})?|%{DATA:rawrequest})" %{NUMBER:status:tag} (?:%{NUMBER:resp_bytes}|-)  %{QS:referrer:drop} %{QS:agent:drop} %{QS:xforwardedfor:drop}
  '''

  grok_timezone = "Local"
  data_format = "grok"

  ## Set the tag that will contain the path of the tailed file. If you don't want this tag, set it to an empty string.
  # path_tag = "path"

  ## multiline parser/codec
  ## https://www.elastic.co/guide/en/logstash/2.4/plugins-filters-multiline.html
  #[inputs.tail.multiline]
    ## The pattern should be a regexp which matches what you believe to be an
	## indicator that the field is part of an event consisting of multiple lines of log data.
    #pattern = "^\s"

    ## This field must be either "previous" or "next".
	## If a line matches the pattern, "previous" indicates that it belongs to the previous line,
	## whereas "next" indicates that the line belongs to the next one.
    #match_which_line = "previous"

    ## The invert_match field can be true or false (defaults to false).
    ## If true, a message not matching the pattern will constitute a match of the multiline
	## filter and the what will be applied. (vice-versa is also true)
    #invert_match = false

    ## After the specified timeout, this plugin sends a multiline event even if no new pattern
	## is found to start a new event. The default timeout is 5s.
    #timeout = 5s

gork表达式举例

conf 复制代码
# nginx 日志格式
'$remote_addr - $remote_user [$time_local] $request_time $upstream_response_time $host "$request" '
                      '$status $body_bytes_sent  "$http_referer" '
                      '"$http_user_agent" "$http_x_forwarded_for"'
 # grok  解析格式              
  NGINX_ACCESS_LOG %{IP:remote_addr} - (-|%{WORD:remote_user:drop}) \[%{HTTPDATE:ts:ts}\] %{BASE10NUM:request_time:float} %{BASE10NUM:upstream_response_time:float} %{IPORHOST:host:tag} "(?:%{WORD:verb:drop} %{NOTSPACE:request:tag}(?: HTTP/%{NUMBER:http_version:drop})?|%{DATA:rawrequest})" %{NUMBER:status:tag} (?:%{NUMBER:resp_bytes}|-)  %{QS:referrer:drop} %{QS:agent:drop} %{QS:xforwardedfor:drop}

# nginx 日志举例
1.1.1.2 - - [30/Jan/2023:02:27:24 +0000] 0.075 0.075 xxx.xxx.xxx "POST /api/xxx/xxx/xxx HTTP/1.1" 200 69  "https://xxx.xxx.xxx/" "Mozilla/5.0 (iPhone; CPU iPhone OS 16_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148" "1.1.1.1"

# grok 解析变量如下再取变量生成influxdb行协议
{
  "NGINX_ACCESS_LOG": [
    [
      "1.1.1.2 - - [30/Jan/2023:02:27:24 +0000] 0.075 0.075 prod.webcomicsapp.com "POST /api/xxx/xxx/xxx HTTP/1.1" 200 69  "https://xxx.xxx.xxx/" "Mozilla/5.0 (iPhone; CPU iPhone OS 16_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148" "1.46.138.190""
    ]
  ],
  "remote_addr": [
    [
      "1.1.1.2"
    ]
  ],
  "IPV6": [
    [
      null,
      null
    ]
  ],
  "IPV4": [
    [
      "1.1.1.2",
      null
    ]
  ],
  "remote_user": [
    [
      null
    ]
  ],
  "ts": [
    [
      "30/Jan/2023:02:27:24 +0000"
    ]
  ],
  "MONTHDAY": [
    [
      "30"
    ]
  ],
  "MONTH": [
    [
      "Jan"
    ]
  ],
  "YEAR": [
    [
      "2023"
    ]
  ],
  "TIME": [
    [
      "02:27:24"
    ]
  ],
  "HOUR": [
    [
      "02"
    ]
  ],
  "MINUTE": [
    [
      "27"
    ]
  ],
  "SECOND": [
    [
      "24"
    ]
  ],
  "INT": [
    [
      "+0000"
    ]
  ],
  "request_time": [
    [
      "0.075"
    ]
  ],
  "upstream_response_time": [
    [
      "0.075"
    ]
  ],
  "host": [
    [
      "xxx.xxx.xxx"
    ]
  ],
  "HOSTNAME": [
    [
      "xxx.xxx.xxx"
    ]
  ],
  "IP": [
    [
      null
    ]
  ],
  "verb": [
    [
      "POST"
    ]
  ],
  "request": [
    [
      "/api/xxx/xxx/xxx"
    ]
  ],
  "http_version": [
    [
      "1.1"
    ]
  ],
  "BASE10NUM": [
    [
      "1.1",
      "200",
      "69"
    ]
  ],
  "rawrequest": [
    [
      null
    ]
  ],
  "status": [
    [
      "200"
    ]
  ],
  "resp_bytes": [
    [
      "69"
    ]
  ],
  "referrer": [
    [
      ""https://xxx.xxx.xxx/""
    ]
  ],
  "QUOTEDSTRING": [
    [
      ""https://xxx.xxx.xxx/"",
      ""Mozilla/5.0 (iPhone; CPU iPhone OS 16_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148"",
      ""1.1.1.1""
    ]
  ],
  "agent": [
    [
      ""Mozilla/5.0 (iPhone; CPU iPhone OS 16_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148""
    ]
  ],
  "xforwardedfor": [
    [
      ""1.1.1.1""
    ]
  ]
}

启动telegraf

bash 复制代码
# 测试启动
nohup  telegraf --config telegraf.conf --debug
# 退出测试
ctl+c
# 后台进程开启
nohup  telegraf --config telegraf.conf >/dev/null 2>&1 &
# 关闭后台进程
ps -aux | grep telegraf
kill -9 '对应pid'

3.配置granfa

a. 登陆granfa http://localhost:3000/login admin admin。首次登陆需要改密码

b.添加数据源


至此tig流转全部完成

相关推荐
Libby博仙12 分钟前
docker 改了镜像源为阿里云,还是下载失败
阿里云·docker·容器
百锦再1 小时前
在Linux上创建一个Docker容器并在其中执行Python脚本
linux·python·docker
钗头风1 小时前
3.Docker常用命令
运维·docker·容器
圣心4 小时前
Ollama Docker 镜像部署
运维·docker·容器
转身後 默落4 小时前
06.Docker 镜像制作和管理
java·docker·容器
_院长大人_14 小时前
Docker Mysql 数据迁移
mysql·adb·docker
drebander18 小时前
Docker 与 Kubernetes(K8s)初探
docker
ccmjga1 天前
升级 SpringBoot3 全项目讲解 — Spring Boot 3 中如何发Http请求?
java·spring boot·后端·docker·gradle·springboot·spring security
曹天骄1 天前
Docker Compose Installation on CentOS 7.9
docker·eureka·centos
wgslucky1 天前
docker 安装jenkins
docker·容器·jenkins