这里直接把json文件转成excel, 太复杂的结构嵌套结构,只转换顶部几层。
主要功能包括:1) 处理嵌套结构和复杂值(字典/列表);2) 支持数字和字符串键的排序;3) 特殊字段(如vv__objid和_TID_)优先排列;4) 可处理key-val格式的JSON数据。脚本通过递归处理JSON数据,保留顶层结构,并将复杂值序列化为JSON字符串,最终生成包含所有数据的Excel工作表。
使用方式为:python json_to_xlsx.py <json文件> <xlsx文件>。
python
import sys
import json
import csv
import io
from xlsxwriter.workbook import Workbook
##
# Convert to string keeping encoding in mind...
##
def to_string(s):
try:
return str(s)
except:
#Change the encoding type if needed
return s.encode('utf-8')
def reduce_item(key, value):
global reduced_item
#Reduction Condition 1
if type(value) is list:
i=0
for sub_item in value:
newkey=key+'_'+to_string(i)
reduce_item(newkey, sub_item)
i=i+1
#Reduction Condition 2
elif type(value) is dict:
sub_keys = value.keys()
for sub_key in sub_keys:
if key == "":
newkey=to_string(sub_key)
else:
newkey=key+'_'+to_string(sub_key)
reduce_item(newkey, value[sub_key])
#Base Condition
else:
reduced_item[to_string(key)] = to_string(value)
def merge_headers(header, keys):
for k in keys:
isexits = 0
for c in header:
if c==k :
isexits = 1
break
if isexits != 1:
header.append(k)
def convert_dict_to_array(data):
"""
将字典转换为数组,支持数字key和字符串key
"""
if isinstance(data, dict) and len(data) > 0:
result_array = []
# 检查所有key是否都是数字
all_numeric_keys = True
for key in data.keys():
try:
int(key)
except (ValueError, TypeError):
all_numeric_keys = False
break
if all_numeric_keys:
# 数字key:按数字排序
sorted_keys = sorted(data.keys(), key=lambda x: int(x))
else:
# 字符串key:按字母排序
sorted_keys = sorted(data.keys())
for key in sorted_keys:
item = data[key]
# 如果item是字典,添加_TID_字段
if isinstance(item, dict):
item = item.copy() # 避免修改原数据
# 创建新字典,_TID_放在第一位
new_item = {'_TID_': key}
new_item.update(item)
item = new_item
else:
# 如果item不是字典,包装成字典并添加_TID_
item = {'_TID_': key, 'value': item}
result_array.append(item)
return result_array
return data
def serialize_complex_values(data):
"""
遍历数组中每个元素的字段,如果字段值是复杂对象(dict或list),
则将其序列化为JSON字符串
"""
if not isinstance(data, list):
return data
result = []
for item in data:
if isinstance(item, dict):
new_item = {}
for key, value in item.items():
# 检查值是否为复杂对象
if isinstance(value, (dict, list)):
# 序列化为JSON字符串
new_item[key] = json.dumps(value, ensure_ascii=False, separators=(',', ':'))
else:
# 保持简单类型不变
new_item[key] = value
result.append(new_item)
else:
result.append(item)
return result
if __name__ == "__main__":
if len(sys.argv) != 3:
print ("\nUsage: python json_to_xlsx.py <json_in_file_path> <xlsxfile>\n")
else:
#Reading arguments
json_file_path = sys.argv[1]
xlsxfile = sys.argv[2]
print("converting " + json_file_path + "\n")
with io.open(json_file_path, 'r', encoding='utf-8-sig') as fp:
json_value = fp.read()
raw_data = json.loads(json_value)
# 检查是否是key-val格式的JSON(从Excel转换而来)
if isinstance(raw_data, dict) and 'key' in raw_data and 'val' in raw_data:
# 转换key-val格式为标准数组格式
keys = raw_data['key']
values = raw_data['val']
converted_data = []
for row in values:
row_dict = {}
for i, key in enumerate(keys):
if i < len(row):
row_dict[key] = row[i]
else:
row_dict[key] = None
converted_data.append(row_dict)
data_to_be_processed = converted_data
else:
data_to_be_processed = raw_data
# 将字典转换为数组(支持数字key和字符串key)
data_to_be_processed = convert_dict_to_array(data_to_be_processed)
# 处理复杂对象值,将其序列化为字符串
data_to_be_processed = serialize_complex_values(data_to_be_processed)
processed_data = []
header = []
# 确保data_to_be_processed是数组
if not isinstance(data_to_be_processed, list):
print("错误:数据处理后仍不是数组格式")
sys.exit(1)
for item in data_to_be_processed:
reduced_item = {}
reduce_item("", item)
merge_headers(header, reduced_item.keys())
processed_data.append(reduced_item)
# 检查最终处理的数据是否为空
if not processed_data:
print("错误:没有有效数据可处理,程序退出")
sys.exit(1)
# 重新排序header,确保vv__objid在第一列,_TID_在第二列
if "vv__objid" in header:
header.remove("vv__objid")
if "_TID_" in header:
header.remove("_TID_")
# 创建新的header列表,将特定字段放在前面
new_header = []
if "vv__objid" in header or any("vv__objid" in item for item in processed_data):
new_header.append("vv__objid")
if "_TID_" in header or any("_TID_" in item for item in processed_data):
new_header.append("_TID_")
# 添加其余的header
for h in header:
if h != "vv__objid" and h != "_TID_":
new_header.append(h)
# 用新的header替换原来的
header = new_header
workbook = Workbook(xlsxfile)
worksheet = workbook.add_worksheet()
c=0;
for h in header:
worksheet.write(0,c,h)
c = c +1;
r=1
for row in processed_data:
c=0;
for h in header:
try:
col = row[h]
except:
col = ""
worksheet.write(r, c, col)
c = c +1
r = r +1
workbook.close()
print ("Just completed writing xlsx file with %d columns" % len(header))