nc数据补值
我的NC数据的右上角缺失值,准备使用IDW插值进行补充。上图为补值前,下图为补值后:
整体逻辑为:
代码:
python
import numpy as np
from netCDF4 import Dataset
from scipy.spatial import cKDTree
import os
def fill_missing_idw(variable, lat, lon, power=2, max_neighbors=8):
"""
使用IDW方法填补二维数组中的缺失值。
参数:
- variable: 2D numpy数组,可能包含缺失值 (masked array)
- lat, lon: 2D numpy数组,与variable形状相同的纬度和经度
- power: IDW的幂参数
- max_neighbors: 使用的最大邻居数量
返回:
- filled_variable: 填补后的二维numpy数组
"""
filled_variable = variable.copy()
ny, nx = variable.shape
if isinstance(variable, np.ma.MaskedArray):
known_mask = ~variable.mask
missing_mask = variable.mask
known_values = variable.data[known_mask]
else:
known_mask = ~np.isnan(variable)
missing_mask = np.isnan(variable)
known_values = variable[known_mask]
known_points = np.column_stack((lon[known_mask], lat[known_mask]))
missing_points = np.column_stack((lon[missing_mask], lat[missing_mask]))
if len(known_points) == 0:
print("没有可用的已知点进行插值。")
return filled_variable
tree = cKDTree(known_points)
distances, indexes = tree.query(missing_points, k=max_neighbors, n_jobs=-1)
if max_neighbors == 1:
distances = distances[:, np.newaxis]
indexes = indexes[:, np.newaxis]
with np.errstate(divide='ignore'):
weights = 1 / distances**power
weights[~np.isfinite(weights)] = 0
interpolated_values = np.sum(weights * known_values[indexes], axis=1) / np.sum(weights, axis=1)
exact_matches = distances[:,0] == 0
interpolated_values[exact_matches] = known_values[indexes[exact_matches, 0]]
filled_variable[missing_mask] = interpolated_values
# 调试信息
n_filled = np.sum(~filled_variable[missing_mask].mask) if isinstance(filled_variable, np.ma.MaskedArray) else np.sum(~np.isnan(filled_variable[missing_mask]))
print(f"尝试填补了 {len(missing_points)} 个缺失点,成功填补了 {n_filled} 个。")
return filled_variable
def fill_missing_idw(variable, lat, lon, power=2, max_neighbors=8):
"""
使用IDW方法填补二维数组中的缺失值。
参数:
- variable: 2D numpy数组,可能包含缺失值 (masked array)
- lat, lon: 2D numpy数组,与variable形状相同的纬度和经度
- power: IDW的幂参数
- max_neighbors: 使用的最大邻居数量
返回:
- filled_variable: 填补后的二维numpy数组
"""
filled_variable = variable.copy()
ny, nx = variable.shape
if isinstance(variable, np.ma.MaskedArray):
known_mask = ~variable.mask
missing_mask = variable.mask
known_values = variable.data[known_mask]
else:
known_mask = ~np.isnan(variable)
missing_mask = np.isnan(variable)
known_values = variable[known_mask]
known_points = np.column_stack((lon[known_mask], lat[known_mask]))
missing_points = np.column_stack((lon[missing_mask], lat[missing_mask]))
if len(known_points) == 0:
print("没有可用的已知点进行插值。")
return filled_variable
tree = cKDTree(known_points)
distances, indexes = tree.query(missing_points, k=max_neighbors)
if max_neighbors == 1:
distances = distances[:, np.newaxis]
indexes = indexes[:, np.newaxis]
with np.errstate(divide='ignore'):
weights = 1 / distances**power
weights[~np.isfinite(weights)] = 0 # 处理距离为0的情况
# 计算权重和
weight_sums = np.sum(weights, axis=1)
# 避免除以零
weight_sums[weight_sums == 0] = 1
interpolated_values = np.sum(weights * known_values[indexes], axis=1) / weight_sums
# 处理距离为0的情况,直接赋值
exact_matches = distances[:,0] == 0
if np.any(exact_matches):
interpolated_values[exact_matches] = known_values[indexes[exact_matches, 0]]
filled_variable[missing_mask] = interpolated_values
# 调试信息
n_filled = np.sum(~filled_variable[missing_mask].mask) if isinstance(filled_variable, np.ma.MaskedArray) else np.sum(~np.isnan(filled_variable[missing_mask]))
print(f"尝试填补了 {len(missing_points)} 个缺失点,成功填补了 {n_filled} 个。")
return filled_variable
def process_nc_file(input_path, output_path):
# 打开 NetCDF 文件
with Dataset(input_path, 'r') as src:
# 创建新的 NetCDF 文件
with Dataset(output_path, 'w', format=src.file_format) as dst:
# 复制全局属性
dst.setncatts({attr: src.getncattr(attr) for attr in src.ncattrs()})
# 复制维度
for dim_name, dim in src.dimensions.items():
dst.createDimension(dim_name, (len(dim) if not dim.isunlimited() else None))
# 复制变量并处理数据
for var_name, var in src.variables.items():
# 复制变量定义
fill_value = getattr(var, '_FillValue', None)
dst_var = dst.createVariable(var_name, var.datatype, var.dimensions, fill_value=fill_value)
dst_var.setncatts({attr: var.getncattr(attr) for attr in var.ncattrs()})
# 读取数据
data = var[:]
# 检查是否是需要插值的变量
if var_name in ['T2D', 'Q2D', 'U2D', 'V2D', 'PSFC', 'RAINRATE', 'SWDOWN', 'LWDOWN']:
# 只处理数据维度为 [Time, south_north, west_east]
if data.ndim == 3:
filled_data = np.ma.array(data) # 确保是masked array
lat = src.variables['lat'][:]
lon = src.variables['lon'][:]
for t in range(data.shape[0]):
print(f"Processing time index {t} for variable {var_name}")
var_data = data[t, :, :]
# 确保 var_data 是 masked array
if not isinstance(var_data, np.ma.MaskedArray):
var_data = np.ma.masked_where(np.isnan(var_data), var_data)
# 填补缺失值
filled_var = fill_missing_idw(var_data, lat, lon)
filled_data[t, :, :] = filled_var
# 将填补后的数据写入目标文件,填补剩余的 NaN 为 _FillValue
fill_val = getattr(var, '_FillValue', np.nan)
dst_var[:] = filled_data.filled(fill_val)
else:
raise ValueError(f"Variable {var_name} 有意外的维度。")
else:
# 其他变量直接复制
dst_var[:] = data
print(f"填补后的数据已保存到 {output_path}")
if __name__ == "__main__":
input_nc = r"aaa.nc"
output_nc = r"aaa_filled.nc"
# 检查输入文件是否存在
if not os.path.exists(input_nc):
print(f"输入文件 {input_nc} 不存在。")
else:
process_nc_file(input_nc, output_nc)