监控工具monit配置文件详解-monit.conf配置文件各项配置解释及关于linux下make过程中的configure/Makefile/-j参数

一、监控工具monit配置文件详解-monit.conf配置文件各项配置解释

monit.conf配置文件主要包括三个部分，

1.Global section 全局配置

2.Services 监控服务

3.Includes 包含文件

其中第2部分和第3部分可以说是一个功能，即可以把所有的服务罗列在Services中，也可以将每个服务写一个文件放到Includes所指的文件夹中，不过因为一般的监控配置内容并不需要很多，所以我推荐都写在这一个文件中，当然如果需要监控的对象太多，可以按类别分开单独写个配置文件也完全可以。完整地看一下monit.conf配置文件的内容吧：

bash 复制代码

## Monit control file
#Monit配置文件中，关键字不区分大小写。 所有路径必须以/开头，即要求完整路径，

## Global section
###############################################################################
#监控检查的时间间隔
set daemon  30
#启动monit后延迟多少秒再进行监控
    with start delay 240

##默认日志为syslog
set logfile syslog   
#也可以设置为自定义路径下的文件
set logfile /opt/modules/monit/monit.log
#设置pid文件的位置
set pidfile /var/run/monit.pid
#设置监控项目唯一ID的文件位置  默认在$HOME/.monit.id
set idfile /var/.monit.id  
#默认存储每个周期内monitorinig 状态的文件
set statefile /var/.monit.state

#一些阈值设置以及ssl设置，一般可忽略
## Set limits for various tests. The following example shows the default values:
##
# set limits {
#     programOutput:     512 B,      # check program's output truncate limit
#     sendExpectBuffer:  256 B,      # limit for send/expect protocol test
#     fileContentBuffer: 512 B,      # limit for file content test
#     httpContentBuffer: 1 MB,       # limit for HTTP content test
#     networkTimeout:    5 seconds   # timeout for network I/O
#     programTimeout:    300 seconds # timeout for check program
#     stopTimeout:       30 seconds  # timeout for service stop
#     startTimeout:      30 seconds  # timeout for service start
#     restartTimeout:    30 seconds  # timeout for service restart
# }
# set ssl {
#     verify     : enable, # verify SSL certificates (disabled by default but STRONGLY RECOMMENDED)
#     selfsigned : allow   # allow self signed SSL certificates (reject by default)
# }


#报警邮件服务配置：主邮件服务器
         set mailserver   mail.bar.baz    
#备邮件服务器
               backup.bar.baz port 10025    

#当邮件服务器不可用时使用eventqueue存储报警事件
# set eventqueue
#     basedir /var/monit  # set the base directory where events will be stored
#     slots 100           # optionally limit the queue size

## Send status and events to M/Monit (for more informations about M/Monit
## see https://mmonit.com/). By default Monit registers credentials with
## M/Monit so M/Monit can smoothly communicate back to Monit and you don't
## have to register Monit credentials manually in M/Monit. It is possible to
## disable credential registration using the commented out option below.
## Though, if safety is a concern we recommend instead using https when
## communicating with M/Monit and send credentials encrypted. The password
## should be URL encoded if it contains URL-significant characters like
## ":", "?", "@". Default timeout is 5 seconds, you can customize it by
## adding the timeout option.
#
# set mmonit http://monit:monit@192.168.1.10:8080/collector
#     # with timeout 30 seconds              # Default timeout is 5 seconds
#     # and register without credentials     # Don't register credentials

#报警邮件的格式
# set mail-format {
#   from:    Monit <monit@$HOST>
#   subject: monit alert --  $EVENT $SERVICE
#   message: $EVENT Service $SERVICE
#                 Date:        $DATE
#                 Action:      $ACTION
#                 Host:        $HOST
#                 Description: $DESCRIPTION
#
#            Your faithful employee,
#            Monit
# }

## 后面可以重写此项配置，不用全部在上面修改
 set mail-format { from: monit@foo.bar }

#报警通知用户的配置，可以过滤或者设置给哪些邮箱通知
 set alert sysadm@foo.bar                       # receive all alerts
 set alert your-name@your.domain not on { instance, action }

#像很多服务一样自带一个httpd服务，可以使用界面化查看
set httpd port 2812 and
    use address localhost  # only accept connection from localhost
    allow localhost        # allow localhost to connect to the server and
    allow admin:monit      # require user 'admin' with password 'monit'
    #with ssl {            # enable SSL/TLS and set path to server certificate
    #    pemfile: /etc/ssl/certs/monit.pem
    #}

## Services 服务的配置
###############################################################################
#系统监控配置
  check system $HOST
    if loadavg (1min) > 4 then alert
    if loadavg (5min) > 2 then alert
    if cpu usage > 95% for 10 cycles then alert
    if memory usage > 75% then alert
    if swap usage > 25% then alert

#判断文件是否存在，判断文件权限，文件uid,gid
## Check if a file exists, checksum, permissions, uid and gid. In addition to alert recipients in the global section, customized alert can be sent to additional recipients by specifying a local alert handler. The service may be grouped using the GROUP option. More than one group can be specified by repeating the 'group name' statement.
  check file apache_bin with path /usr/local/apache/bin/httpd
    if failed checksum and
       expect the sum 8f7f419955cefa0b33a2ba316cba3659 then unmonitor
    if failed permission 755 then unmonitor
    if failed uid "root" then unmonitor
    if failed gid "root" then unmonitor
    alert security@foo.bar on {
           checksum, permission, uid, gid, unmonitor
        } with the mail-format { subject: Alarm! }
    group server

#根据pidfile监控进程
#  check process apache with pidfile /usr/local/apache/logs/httpd.pid
    start program = "/etc/init.d/httpd start" with timeout 60 seconds
    stop program  = "/etc/init.d/httpd stop"
    if cpu > 60% for 2 cycles then alert
    if cpu > 80% for 5 cycles then restart
    if totalmem > 200.0 MB for 5 cycles then restart
    if children > 250 then restart
    if loadavg(5min) greater than 10 for 8 cycles then stop
    if disk read > 500 kb/s for 10 cycles then alert
    if disk write > 500 kb/s for 10 cycles then alert
    if failed host www.tildeslash.com port 80 protocol http and request "/somefile.html" then restart
    if failed port 443 protocol https with timeout 15 seconds then restart
    if 3 restarts within 5 cycles then unmonitor
    depends on apache_bin
    group server

#可以检查文件系统的读写权限，uid,gid等各种数据
## Check filesystem permissions, uid, gid, space usage, inode usage and disk I/O. Other services, such as databases, may depend on this resource and an automatically graceful stop may be cascaded to them before the filesystem will become full and data lost.
  check filesystem datafs with path /dev/sdb1
    start program  = "/bin/mount /data"
    stop program  = "/bin/umount /data"
    if failed permission 660 then unmonitor
    if failed uid "root" then unmonitor
    if failed gid "disk" then unmonitor
    if space usage > 80% for 5 times within 15 cycles then alert
    if space usage > 99% then stop
    if inode usage > 30000 then alert
    if inode usage > 99% then stop
    if read rate > 1 MB/s for 5 cycles then alert
    if read rate > 500 operations/s for 5 cycles then alert
    if write rate > 1 MB/s for 5 cycles then alert
    if write rate > 500 operations/s for 5 cycles then alert
    if service time > 10 milliseconds for 3 times within 5 cycles then alert
    group server

#可以监制文件的时间戳值
## Check a file's timestamp. In this example, we test if a file is older than 15 minutes and assume something is wrong if its not updated. Also, if the file size exceed a given limit, execute a script
  check file database with path /data/mydatabase.db
    if failed permission 700 then alert
    if failed uid "data" then alert
    if failed gid "data" then alert
    if timestamp > 15 minutes then alert
    if size > 100 MB then exec "/my/cleanup/script" as uid dba and gid dba

#可以监控目录的权限,uid,gid
## Check directory permission, uid and gid.  An event is triggered if the directory does not belong to the user with uid 0 and gid 0.  In addition, the permissions have to match the octal description of 755 (see chmod(1)).
  check directory bin with path /bin
    if failed permission 755 then unmonitor
    if failed uid 0 then unmonitor
    if failed gid 0 then unmonitor

#可以监控远程主机，ping主机以及监控其端口
## Check a remote host availability by issuing a ping test and check the content of a response from a web server. Up to three pings are sent and connection to a port and an application level network check is performed.
  check host myserver with address 192.168.1.1
    if failed ping then alert
    if failed port 3306 protocol mysql with timeout 15 seconds then alert
    if failed port 80 protocol http
       and request /some/path with content = "a string"
    then alert

#可以监控网卡
## Check a network link status (up/down), link capacity changes, saturation and bandwidth usage.
  check network public with interface eth0
    if failed link then alert
    if changed link then alert
    if saturation > 90% then alert
    if download > 10 MB/s then alert
    if total uploaded > 1 GB in last hour then alert

#可以监控自定义的脚本程序
  check program myscript with path /usr/local/bin/myscript.sh
   if status != 0 then alert

## Includes 包含文件部分
###############################################################################
#指定包含目录即可。
set daemon 30
include /etc/monit.d/*

认真看了这个文档，才发现monit可以通过脚本路径直接监控自己写的一些shell脚本的执行进程，这个挺好。另外从文档上来看，monit可以做的事情有很多，除了监控本机的进程，文件，网卡，系统数据外，还可以监控外部服务器的各个端口响应；除了通过端口监控，还可以通过PID文件，文件时间戳值，文件权限等角度来监控。感觉比supervisord更好用，我突然想到一个很合适的比喻来比较supervisord和monit.d的差别，supervisord就像开发商交给你的毛坯房，有个房子的样子，但里面什么也没有，都要自己去实现。而monit.d则像是精装房，什么都有了，你只要拎包入住即可。

二、关于linux下make过程中的configure/Makefile/-j参数

1. Linux下config/configure/Configure等命令

config/configure/Configure、make 、make test/make check、sudo make install 的作用

config/configure/Configure

这个是用来检测你的安装平台的目标特征的。比如它会检测你是不是有CC或GCC，并不是需要CC或GCC，它是个shell脚本

这一步一般用来生成 Makefile，为下一步的编译做准备，你可以通过在 configure 后加上参数来对安装进行控制，比如:

./configure --prefix=/usr

上面的意思是将该软件安装在 /usr 下面

执行文件就会安装在 /usr/bin （而不是默认的 /usr/local/bin)

资源文件就会安装在 /usr/share（而不是默认的/usr/local/share）

同时一些软件的配置文件你可以通过指定 --sys-config= 参数进行设定

还有诸如：--with、--enable、--without、--disable 等等参数对编译加以控制，你可以通过 ./configure --help 察看详细的说明帮助

make

这一步是用来编译的，它从Makefile中读取指令，然后编译，大多数的源代码包都经过这一步进行编译，当然有些perl或python编写的软件需要调用perl或python来进行编译。

如果在 make 过程中出现 error ，你就要记下错误代码（注意不仅仅是最后一行），然后你可以向开发者提交 bugreport（一般在 INSTALL 里有提交地址），或者你的系统少了一些依赖库等，这些需要自己仔细研究错误代码和错误提示内容。

make test / make check

顾名思义，这一步就是对上一步 make 的检查了，要确保 make 是没有错误的，也就是这一步的 test、check要全部是 OK 的，error 为0。执行make test/check可以快速帮你检查执行make会不会有错，以避免花了大量时间处理make最后都因为报错功亏一篑。

sudo make install

这一步是用来安装的，它也从Makefile中读取指令，安装到指定的位置，这条命令来进行安装，一般需要你有 root 权限（因为要向系统写入文件），所以前面用了 sudo

2. 关于Makefile，什么是makefile

makefile定义了一系列的规则来指定，哪些文件需要先编译，哪些文件需要后编译，哪些文件需要重新编译，甚至于进行更复杂的功能操作，因为 makefile就像一个Shell脚本一样，其中也可以执行操作系统的命令。

makefile带来的好处就是--"自动化编译",一旦写好，只需要一个make命令，整个工程完全自动编译，极大的提高了软件开发的效率。make是一个命令工具，是一个解释makefile中指令的命令工具，一般来说，大多数的IDE都有这个命令，比如：Delphi的make,Visual C++的nmake,Linux下GNU的make.可见，makefile都成为了一种在工程方面的编译方法。

make： 根据Makefile文件编译源代码、连接、生成目标文件、可执行文件。
make clean： 清除上次的make命令所产生的object文件（后缀为".o"的文件）及可执行文件。
make install： 将编译成功的可执行文件安装到系统目录中，一般为/usr/local/bin目录。
**make dist：**产生发布软件包文件（即distribution package）。这个命令将会将可执行文件及相关文件打包成一个tar.gz压缩的文件用来作为发布软件的软件包。它会在当前目录下生成一个名字类似"PACKAGE-VERSION.tar.gz"的文件。PACKAGE和VERSION,是我们在configure.in中定义的AM_INIT_AUTOMAKE（PACKAGE, VERSION）。

**make distcheck：**生成发布软件包并对其进行测试检查，以确定发布包的正确性。这个操作将自动把压缩包文件解开，然后执行configure命令，并且执行make,来确认编译不出现错误，最后提示你软件包已经准备好，可以发布了。

**make distclean：**类似make clean,但同时也将configure生成的文件全部删除掉，包括Makefile文件。

3. 关于make -j2或者make -j4

执行make的时候，有时会看到有执行make -j2或者make -j4之类的选项，make -j4是什么意思呢？

j就是jobs的意思，make(gmake,gnumake)的-j参数，是一个优化多核、多线程的编译过程的功能参数，可以指定用多少个CPU来执行make功能，如果服务器是多核CPU，建议j后面的参数执行大点，比如-j2/-j4/-j8。但也并不是完全越大越好，从我使用经验来看，多核的服务器如果加上-j2、-j4参数会明显感觉到速度会快很多，但加到超过CPU个数的一半时发现并不会有多少提供，总之多核服务器建议加上-j参数，-j2、-j4、-j8都可以,不要超过CPU个数的一半。