bash
复制代码
===原始数据格式: 1条 (2*2)==》4个指标数据
[{"app":"aa","url":"www.1.com","metrics":[{"name":"cpu","value":11},{"name":"mem","value":1}]},
{"app":"bb","url":"www.2.com","metrics":[{"name":"cpu","value":12},{"name":"mem","value":2}]}]
====> 希望得到的数据格式 =》 4个指标数据单独为一行
1{
"name" => "cpu",
"app" => "aa",
"@timestamp" => 2024-11-22T05:57:29.671882657Z,
"url" => "www.1.com",
"value" => 11
}
2{
"name" => "mem",
"app" => "aa",
"@timestamp" => 2024-11-22T05:57:29.671882657Z,
"url" => "www.1.com",
"value" => 1
}
3{
"name" => "cpu",
"app" => "bb",
"@timestamp" => 2024-11-22T05:57:29.672029155Z,
"url" => "www.2.com",
"value" => 12
}
4{
"name" => "mem",
"app" => "bb",
"@timestamp" => 2024-11-22T05:57:29.672029155Z,
"url" => "www.2.com",
"value" => 2
}
bash
复制代码
[root@t1 test]# cat a.json
[{"app":"aa","url":"www.1.com","metrics":[{"name":"cpu","value":11},{"name":"mem","value":1}]},{"app":"bb","url":"www.2.com","metrics":[{"name":"cpu","value":12},{"name":"mem","value":2}]}]
[root@t1 test]# cat logstash-pipline.conf
input {
# kafka {
# bootstrap_servers => "localhost:9092"
# topics => ["your_topic_name"]
# codec => "json"
# add_field => {
# "source" => "kafka"
# }
# }
file {
path => "/root/test/a.json" # 指定JSON文件的路径
start_position => "beginning" # 从文件的开头开始读取(可选,默认为"end",即从文件末尾开始读取新添加的内容)
codec => "json" # 指定文件编码格式为JSON
sincedb_path => "/dev/null" # 禁用sincedb文件,以便每次运行时都重新读取整个文件(可选,通常用于调试)
}
}
filter {
if [message] { ##拆分message: 大数组
json {
source => "message"
target => "data_array"
}
split {
field => "data_array"
remove_field => "message"
}
}
split { ##拆分metrics: 小数组
field => "metrics"
}
mutate {
add_field => {"field1" => "%{metrics}"}
}
json {
source => "field1"
}
mutate {
remove_field => ["field1","metrics","log","host","@version"] #log,host,@version是系统添加的字段
}
}
output {
stdout { codec => rubydebug } # 将数据输出到控制台,并使用rubydebug编解码器进行格式化
}
[root@t1 test]# /data01/logstash-8.7.1/bin/logstash -f /root/test/logstash.conf