1、准备安装包apache-flume-1.11.0-bin.tar.gz;
上传;
data:image/s3,"s3://crabby-images/cf874/cf8746d082f33ffd587d19e81a54b59fce8f7d49" alt=""
data:image/s3,"s3://crabby-images/d1bfc/d1bfcda06003cfbfe08313a9701a3322e4385b12" alt=""
2、安装flume-1.11.0;
解压;
bash
tar -zxvf apache-flume-1.11.0-bin.tar.gz -C /opt/server
data:image/s3,"s3://crabby-images/50e7c/50e7cda9b093099d591b66674937955066919b06" alt=""
data:image/s3,"s3://crabby-images/60a7e/60a7e4662991d9b842ec4fde7c712dd8ad4fd4db" alt=""
进入conf目录,修改flume-env.sh,配置JAVA_HOME;
bash
cd /opt/server/apache-flume-1.9.0-bin/conf
# 先复制一份flume-env.sh.template文件
cp flume-env.sh.template flume-env.sh
# 修改
vim flume-env.sh
export JAVA_HOME=/opt/server/jdk1.8.0_221
data:image/s3,"s3://crabby-images/0cf8c/0cf8c9b4306192827d04983ac82219c4cef21e16" alt=""
data:image/s3,"s3://crabby-images/16563/1656326332e2a6a325ab9bf8ac2c4c76c6abf943" alt=""
3、flume采集nginx的日志数据,保存到hdfs;
安装nginx;
bash
yum install epel-release
yum update
yum -y install nginx
data:image/s3,"s3://crabby-images/38d2e/38d2e406b9547445ab6c386eda2b08f429f2b14e" alt=""
data:image/s3,"s3://crabby-images/4549c/4549c6657ccb5d2134260d43025bf1862744c986" alt=""
"yum update"命令最后报错了,但好像没有影响nginx的安装,估计是版本兼容性问题,如下图;
data:image/s3,"s3://crabby-images/b37cd/b37cdf958a763016c325cc7a18ccafa7f5b5b510" alt=""
data:image/s3,"s3://crabby-images/8744f/8744fc223410dcf6eeb8877c4562ae47f752dfe6" alt=""
data:image/s3,"s3://crabby-images/0176c/0176ca0bf26ddef14b045f1c37434fc2ca1ade64" alt=""
nginx命令;
bash
systemctl start nginx #开启nginx服务
systemctl stop nginx #停止nginx服务
systemctl restart nginx #重启nginx服务
启动nginx后,访问80端口;
data:image/s3,"s3://crabby-images/10238/102384bd1b613a6406b3d1d5aebbfb3a1bfb6a57" alt=""
data:image/s3,"s3://crabby-images/6b106/6b1065d724e478aaa8938ac12e065b4f54c5a3c0" alt=""
nginx网络80端口访问日志文件保存位置;
bash
cd /var/log/nginx
data:image/s3,"s3://crabby-images/d4e51/d4e51f33228569ca0c1116e32cb48e625be12edd" alt=""
4、flume-1.9之后版本整合hadoop3.x版本;
注意:网上说"在hadoop3.x之前需要将flume的lib 文件夹下的 guava-11.0.2.jar 删除,否则会报错,Hadoop 3.1.0之后无需删除,是兼容的,flume1.9",本次没有删除。
"/opt/server/apache-flume-1.11.0-bin/lib/guava-11.0.2.jar"
data:image/s3,"s3://crabby-images/feb12/feb12f6f7f027e825f54742c6b735b6a3f9bacf0" alt=""
拷贝hadoop3.x里相关的jar包到flume-1.11.0的lib目录;
bash
cp /opt/server/hadoop-3.3.1/share/hadoop/common/*.jar /opt/server/apache-flume-1.11.0-bin/lib
cp /opt/server/hadoop-3.3.1/share/hadoop/common/lib/*.jar /opt/server/apache-flume-1.11.0-bin/lib
cp /opt/server/hadoop-3.3.1/share/hadoop/hdfs/*.jar /opt/server/apache-flume-1.11.0-bin/lib
data:image/s3,"s3://crabby-images/8e4ab/8e4ab63080daecf129c4813828dacb3d054b5518" alt=""
data:image/s3,"s3://crabby-images/4c406/4c406f14d81286e31b3dbe1a1938b925031a29d7" alt=""
data:image/s3,"s3://crabby-images/60579/60579cd2d6231bb869b6d019de2ad8e1048490ee" alt=""
5、flume采集nginx日志,保存到hdfs;
在目录"/opt/server/apache-flume-1.11.0-bin/conf/"创建配置文件taildir-hdfs.conf,并且编辑内容;
data:image/s3,"s3://crabby-images/315c4/315c4a9abc2686c51899713a0cba08d40f073ca2" alt=""
taildir-hdfs.conf;
bash
a3.sources = r3
a3.sinks = k3
a3.channels = c3
# Describe/configure the source
a3.sources.r3.type = TAILDIR
a3.sources.r3.filegroups = f1
# 此处支持正则
a3.sources.r3.filegroups.f1 = /var/log/nginx/access.log
# 用于记录文件读取的位置信息
a3.sources.r3.positionFile = /opt/server/apache-flume-1.11.0-bin/tail_dir.json
# Describe the sink
a3.sinks.k3.type = hdfs
a3.sinks.k3.hdfs.path = hdfs://server:8020/user/tailDir
a3.sinks.k3.hdfs.fileType = DataStream
# 设置每个文件的滚动大小大概是 128M,默认值:1024,当临时文件达到该大小(单位:bytes)时,滚动成目标文件。如果设置成0,则表示不根据临时文件大小来滚动文件。
a3.sinks.k3.hdfs.rollSize = 134217700
# 默认值:10,当events数据达到该数量时候,将临时文件滚动成目标文件,如果设置成0,则表示不根据events数据来滚动文件。
a3.sinks.k3.hdfs.rollCount = 0
# 不随时间滚动,默认为30秒
a3.sinks.k3.hdfs.rollInterval = 60
# flume检测到hdfs在复制块时会自动滚动文件,导致roll参数不生效,要将该参数设置为1;否则HFDS文
件所在块的复制会引起文件滚动
a3.sinks.k3.hdfs.minBlockReplicas = 1
# Use a channel which buffers events in memory
a3.channels.c3.type = memory
a3.channels.c3.capacity = 1000
a3.channels.c3.transactionCapacity = 100
# Bind the source and sink to the channel
a3.sources.r3.channels = c3
a3.sinks.k3.channel = c3
data:image/s3,"s3://crabby-images/a9512/a951238a780781211313672be068289c9d899afa" alt=""
data:image/s3,"s3://crabby-images/3507f/3507f8189dba0cf775dd0338635815c0cfc0be76" alt=""
flume启动命令:"./bin/flume-ng agent -c ./conf -f ./conf/taildir-hdfs.conf -n a3 -Dflume.root.logger=INFO,console";
用"ctrl+c"也可以停止当前运行的进程;
data:image/s3,"s3://crabby-images/8f8c1/8f8c1247f042a6f2ded4eeaffb57872d8dfdf39c" alt=""
日志已经写入hdfs;
data:image/s3,"s3://crabby-images/12ffc/12ffc6f012cd28b493f8fab67a34c374be0bf5f8" alt=""
注意:在flume1.10之后的版本,启动命令使用参数"-Dflume.root.logger=INFO,console",仍无法在控制台打印日志,主要原因是:Flume从1.10版本开始,使用Log4j 2.x替换Log4j 1.x版本,使用log4j2.xml替换log4j.properties。
data:image/s3,"s3://crabby-images/5715d/5715ddb4eb9564357a9b718a7618ddab511d6dae" alt=""
网上有解决方法的文章。