json文件
{"name": "John", "age": 30}, {"name": "Jane", "age": 25}, {"name": "Bob", "age": 40}
jsonl文件
{"name": "John", "age": 30}
{"name": "Jane", "age": 25}
{"name": "Bob", "age": 40}
python
import json
jsonl_file="b.jsonl"
json_file="a.json"
output_jsonl="c.jsonl"
with open(jsonl_file, "r", encoding="utf-8") as f_in, open(output_jsonl, "w", encoding="utf-8") as f_out:
for idx, line in enumerate(f_in):
data = json.loads(line)
question_text = data.get("question", "")
image_dir = data.get("image", "")
audio_dir = data.get("audio", "")
f_out.write(json.dumps(data, ensure_ascii=False) + "\n") #写jsonl文件
with open(json_file, "r", encoding="utf-8") as f_in, open(output_jsonl, "w", encoding="utf-8") as f_out:
data = json.load(f_in)
t=[]
for i in data:
print(i)
t.append(i)
with open("data.json", "w") as f:
json.dump(t, f)
txt文件读写
python
with open('f.txt', 'r') as file:
# content = file.read()
for i,j in enumerate(file):
print(i,j)
# print(content)
# 写入文本
with open('file.txt', 'w') as file:
file.write('Hello, world!')
excel文件
python
import pandas as pd
# ===== Excel 读写 =====
df_excel = pd.read_excel(
'data/Data_Dictionary.xlsx',
sheet_name='train',
header=2
)
# 遍历每一行(推荐方式)
for idx, row in df_excel.iterrows():
# 示例:访问某一列
# value = row['column_name']
print(idx, row.to_dict())
# 写入 Excel
df_excel.to_excel(
'data/output.xlsx',
index=False
)
csv文件
python
import pandas as pd
# ===== CSV 读写 =====
df_csv = pd.read_csv(
'data/sample_submission.csv',
header=0
)
# 遍历每一行
for idx, row in df_csv.iterrows():
print(idx, row.to_dict())
# 查看前 5 行
print(df_csv.head(5))
# 写入 CSV
df_csv.to_csv(
'data/output.csv',
index=False
)