二百四十四、Hive——Hive中解析复杂JSON,既有对象还有数组,而且数组中包含数组

一、目的

由于协议修改,修改后的原始数据JSON更加复杂,导致Hive中解析的难度更大,搞了一天,还好同事发了篇知乎文章,终于得以解决,天哪,太不容易了

二、数据协议案例

{
"deviceNo": "39",
"sourceDeviceType": null,
"sn": null,
"model": null,
"createTime": "2024-07-16 07:30:00",
"data": {
"cycle": 300,
"sectionList": [{
"sectionNo": 1,
"coilList": [{
"laneNo": 1,
"laneType": null,
"coilNo": 1,
"volumeSum": 2,
"volumePerson": 0,
"volumeCarNon": 0,
"volumeCarSmall": 2,
"volumeCarMiddle": 0,
"volumeCarBig": 0,
"speedAvg": 29.65,
"timeOccupancy": 0.63,
"averageHeadway": 154.79,
"averageGap": 153.49,
"speed85": 40.0

},

{

"laneNo": 2,

"laneType": null,

"coilNo": 2,

"volumeSum": 5,

"volumePerson": 0,

"volumeCarNon": 0,

"volumeCarSmall": 5,

"volumeCarMiddle": 0,

"volumeCarBig": 0,

"speedAvg": 23.35,

"timeOccupancy": 2.99,

"averageHeadway": 123.27,

"averageGap": 121.08,

"speed85": 34.0

},

{

"laneNo": 3,

"laneType": null,

"coilNo": 3,

"volumeSum": 9,

"volumePerson": 0,

"volumeCarNon": 0,

"volumeCarSmall": 9,

"volumeCarMiddle": 0,

"volumeCarBig": 0,

"speedAvg": 26.22,

"timeOccupancy": 4.52,

"averageHeadway": 36.98,

"averageGap": 35.49,

"speed85": 36.0

},

{

"laneNo": 4,

"laneType": null,

"coilNo": 4,

"volumeSum": 10,

"volumePerson": 0,

"volumeCarNon": 0,

"volumeCarSmall": 10,

"volumeCarMiddle": 0,

"volumeCarBig": 0,

"speedAvg": 39.47,

"timeOccupancy": 2.69,

"averageHeadway": 34.73,

"averageGap": 33.78,

"speed85": 56.0

}]

},

{

"sectionNo": 2,

"coilList": [{

"laneNo": 5,

"laneType": null,

"coilNo": 5,

"volumeSum": 1,

"volumePerson": 0,

"volumeCarNon": 0,

"volumeCarSmall": 1,

"volumeCarMiddle": 0,

"volumeCarBig": 0,

"speedAvg": 32.74,

"timeOccupancy": 0.57,

"averageHeadway": 618.59,

"averageGap": 617.59,

"speed85": 32.74

},

{

"laneNo": 6,

"laneType": null,

"coilNo": 6,

"volumeSum": 3,

"volumePerson": 0,

"volumeCarNon": 0,

"volumeCarSmall": 3,

"volumeCarMiddle": 0,

"volumeCarBig": 0,

"speedAvg": 39.27,

"timeOccupancy": 0.37,

"averageHeadway": 125.1,

"averageGap": 124.26,

"speed85": 49.0

},

{

"laneNo": 7,

"laneType": null,

"coilNo": 7,

"volumeSum": 4,

"volumePerson": 0,

"volumeCarNon": 0,

"volumeCarSmall": 4,

"volumeCarMiddle": 0,

"volumeCarBig": 0,

"speedAvg": 49.15,

"timeOccupancy": 0.96,

"averageHeadway": 91.65,

"averageGap": 91.05,

"speed85": 54.0

},

{

"laneNo": 8,

"laneType": null,

"coilNo": 8,

"volumeSum": 1,

"volumePerson": 0,

"volumeCarNon": 0,

"volumeCarSmall": 1,

"volumeCarMiddle": 0,

"volumeCarBig": 0,

"speedAvg": 60.2,

"timeOccupancy": 0.17,

"averageHeadway": 50.3,

"averageGap": 49.7,

"speed85": 60.2

}]

}]

}

}

三、参考知乎文章链接

https://zhuanlan.zhihu.com/p/461838868

四、HiveSQL

1.首先,解析出第一层、第二层、第三层JSON

复制代码
select
       get_json_object(statistics_json,'$.deviceNo')          device_no,
       get_json_object(statistics_json,'$.sourceDeviceType')  source_device_type,
       get_json_object(statistics_json,'$.sn')                sn,
       get_json_object(statistics_json,'$.model')             model,
       get_json_object(statistics_json,'$.createTime')        create_time ,
       get_json_object(statistics_json,'$.data.cycle')        cycle,
       get_json_object(replace(replace(section_list,':{',':[{'),'}}','}]}'),'$.sectionNo') section_no,
       section_list
from hurys_dc_ods.ods_statistics
lateral view explode(split(replace(replace(replace(get_json_object(statistics_json,'$.data.sectionList'),'[',''),']',''),'},{"sectionNo"','}|{"sectionNo"'),"\\|")) tf as section_list
where day='2024-07-16'

2.然后,解析出coil_list字段里的第四层JSON

复制代码
select
        t1.device_no,
        source_device_type,
        sn,
        model,
        create_time,
        cycle,
        get_json_object(coil_list,'$.laneNo')  lane_no,
        get_json_object(coil_list,'$.laneType')           lane_type,
        section_no,
        get_json_object(coil_list,'$.coilNo')             coil_no,
        get_json_object(coil_list,'$.volumeSum')          volume_sum,
        get_json_object(coil_list,'$.volumePerson')       volume_person,
        get_json_object(coil_list,'$.volumeCarNon')       volume_car_non,
        get_json_object(coil_list,'$.volumeCarSmall')     volume_car_small,
        get_json_object(coil_list,'$.volumeCarMiddle')    volume_car_middle,
        get_json_object(coil_list,'$.volumeCarBig')       volume_car_big,
        get_json_object(coil_list,'$.speedAvg')           speed_avg,
        get_json_object(coil_list,'$.speed85')            speed_85,
        get_json_object(coil_list,'$.timeOccupancy')      time_occupancy,
        get_json_object(coil_list,'$.averageHeadway')     average_headway,
        get_json_object(coil_list,'$.averageGap')         average_gap,
        substr(create_time,1,10) day
from (select
       get_json_object(statistics_json,'$.deviceNo')          device_no,
       get_json_object(statistics_json,'$.sourceDeviceType')  source_device_type,
       get_json_object(statistics_json,'$.sn')                sn,
       get_json_object(statistics_json,'$.model')             model,
       get_json_object(statistics_json,'$.createTime')        create_time ,
       get_json_object(statistics_json,'$.data.cycle')        cycle,
       get_json_object(replace(replace(section_list,':{',':[{'),'}}','}]}'),'$.sectionNo') section_no,
       section_list
from hurys_dc_ods.ods_statistics
lateral view explode(split(replace(replace(replace(get_json_object(statistics_json,'$.data.sectionList'),'[',''),']',''),'},{"sectionNo"','}|{"sectionNo"'),"\\|")) tf as section_list
where day='2024-07-16'
     ) as t1
lateral view explode(split(replace(replace(replace(get_json_object(replace(replace(section_list,':{',':[{'),'}}','}]}'),'$.coilList'),'[',''),']',''),'},','}|'),"\\|")) tf1 as coil_list;
;

3.运行SQL,验证一下

终于解决了,终于解决了!!!

相关推荐
NiceCloud喜云1 小时前
Claude Code 跑 HyperFrames 实测:本地生成 AI 视频素材全流程
java·运维·人工智能·自动化·json·音视频·飞书
逍遥德2 小时前
PostgreSQL --- JSON 函数详解
数据库·sql·postgresql·json
輕華4 小时前
Flask_GET请求与JSON响应实战详解
python·flask·json
NiceCloud喜云15 小时前
Claude Code Routines 实战:三种触发器跑通云端自动化编码
android·运维·数据库·人工智能·自动化·json·飞书
海兰16 小时前
Kibana Dashboard as Code:Elastic 9.4 如何用 Terraform 和类型化 API 终结“JSON 垃圾袋“
云原生·json·terraform
前网易架构师-高司机1 天前
带标注的交警识别数据集,可识别交警和非交警,5587张图,支持yolo,coco json,voc xml,文末有模型训练代码
xml·yolo·json·数据集·交警
●VON1 天前
鸿蒙Flutter实战:放弃sqflite选纯Dart JSON文件存储
flutter·华为·json·harmonyos·鸿蒙
MageGojo1 天前
给起名工具接入八字起名 API:参数设计、JSON 示例和应用场景
json·apache
jieyucx1 天前
Go 语言 JSON 序列化/反序列化:Tag 用法完全指南
开发语言·golang·json·序列化·tag
jieyucx2 天前
Go 语言 JSON 序列化与反序列化
开发语言·golang·json·序列化